Spaces:
Paused
Paused
| """Functions to convert NetworkX graphs to and from common data containers | |
| like numpy arrays, scipy sparse arrays, and pandas DataFrames. | |
| The preferred way of converting data to a NetworkX graph is through the | |
| graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph` | |
| function which attempts to guess the input type and convert it automatically. | |
| Examples | |
| -------- | |
| Create a 10 node random graph from a numpy array | |
| import numpy as np | |
| rng = np.random.default_rng() | |
| a = rng.integers(low=0, high=2, size=(10, 10)) | |
| DG = nx.from_numpy_array(a, create_using=nx.DiGraph) | |
| or equivalently: | |
| DG = nx.DiGraph(a) | |
| which calls `from_numpy_array` internally based on the type of ``a``. | |
| See Also | |
| -------- | |
| nx_agraph, nx_pydot | |
| """ | |
| import itertools | |
| from collections import defaultdict | |
| import networkx as nx | |
| from networkx.utils import not_implemented_for | |
| __all__ = [ | |
| "from_pandas_adjacency", | |
| "to_pandas_adjacency", | |
| "from_pandas_edgelist", | |
| "to_pandas_edgelist", | |
| "from_scipy_sparse_array", | |
| "to_scipy_sparse_array", | |
| "from_numpy_array", | |
| "to_numpy_array", | |
| ] | |
| def to_pandas_adjacency( | |
| G, | |
| nodelist=None, | |
| dtype=None, | |
| order=None, | |
| multigraph_weight=sum, | |
| weight="weight", | |
| nonedge=0.0, | |
| ): | |
| """Returns the graph adjacency matrix as a Pandas DataFrame. | |
| Parameters | |
| ---------- | |
| G : graph | |
| The NetworkX graph used to construct the Pandas DataFrame. | |
| nodelist : list, optional | |
| The rows and columns are ordered according to the nodes in `nodelist`. | |
| If `nodelist` is None, then the ordering is produced by G.nodes(). | |
| multigraph_weight : {sum, min, max}, optional | |
| An operator that determines how weights in multigraphs are handled. | |
| The default is to sum the weights of the multiple edges. | |
| weight : string or None, optional | |
| The edge attribute that holds the numerical value used for | |
| the edge weight. If an edge does not have that attribute, then the | |
| value 1 is used instead. | |
| nonedge : float, optional | |
| The matrix values corresponding to nonedges are typically set to zero. | |
| However, this could be undesirable if there are matrix values | |
| corresponding to actual edges that also have the value zero. If so, | |
| one might prefer nonedges to have some other value, such as nan. | |
| Returns | |
| ------- | |
| df : Pandas DataFrame | |
| Graph adjacency matrix | |
| Notes | |
| ----- | |
| For directed graphs, entry i,j corresponds to an edge from i to j. | |
| The DataFrame entries are assigned to the weight edge attribute. When | |
| an edge does not have a weight attribute, the value of the entry is set to | |
| the number 1. For multiple (parallel) edges, the values of the entries | |
| are determined by the 'multigraph_weight' parameter. The default is to | |
| sum the weight attributes for each of the parallel edges. | |
| When `nodelist` does not contain every node in `G`, the matrix is built | |
| from the subgraph of `G` that is induced by the nodes in `nodelist`. | |
| The convention used for self-loop edges in graphs is to assign the | |
| diagonal matrix entry value to the weight attribute of the edge | |
| (or the number 1 if the edge has no weight attribute). If the | |
| alternate convention of doubling the edge weight is desired the | |
| resulting Pandas DataFrame can be modified as follows: | |
| >>> import pandas as pd | |
| >>> pd.options.display.max_columns = 20 | |
| >>> import numpy as np | |
| >>> G = nx.Graph([(1, 1)]) | |
| >>> df = nx.to_pandas_adjacency(G, dtype=int) | |
| >>> df | |
| 1 | |
| 1 1 | |
| >>> df.values[np.diag_indices_from(df)] *= 2 | |
| >>> df | |
| 1 | |
| 1 2 | |
| Examples | |
| -------- | |
| >>> G = nx.MultiDiGraph() | |
| >>> G.add_edge(0, 1, weight=2) | |
| 0 | |
| >>> G.add_edge(1, 0) | |
| 0 | |
| >>> G.add_edge(2, 2, weight=3) | |
| 0 | |
| >>> G.add_edge(2, 2) | |
| 1 | |
| >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int) | |
| 0 1 2 | |
| 0 0 2 0 | |
| 1 1 0 0 | |
| 2 0 0 4 | |
| """ | |
| import pandas as pd | |
| M = to_numpy_array( | |
| G, | |
| nodelist=nodelist, | |
| dtype=dtype, | |
| order=order, | |
| multigraph_weight=multigraph_weight, | |
| weight=weight, | |
| nonedge=nonedge, | |
| ) | |
| if nodelist is None: | |
| nodelist = list(G) | |
| return pd.DataFrame(data=M, index=nodelist, columns=nodelist) | |
| def from_pandas_adjacency(df, create_using=None): | |
| r"""Returns a graph from Pandas DataFrame. | |
| The Pandas DataFrame is interpreted as an adjacency matrix for the graph. | |
| Parameters | |
| ---------- | |
| df : Pandas DataFrame | |
| An adjacency matrix representation of a graph | |
| create_using : NetworkX graph constructor, optional (default=nx.Graph) | |
| Graph type to create. If graph instance, then cleared before populated. | |
| Notes | |
| ----- | |
| For directed graphs, explicitly mention create_using=nx.DiGraph, | |
| and entry i,j of df corresponds to an edge from i to j. | |
| If `df` has a single data type for each entry it will be converted to an | |
| appropriate Python data type. | |
| If you have node attributes stored in a separate dataframe `df_nodes`, | |
| you can load those attributes to the graph `G` using the following code: | |
| ``` | |
| df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]}) | |
| G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows()) | |
| ``` | |
| If `df` has a user-specified compound data type the names | |
| of the data fields will be used as attribute keys in the resulting | |
| NetworkX graph. | |
| See Also | |
| -------- | |
| to_pandas_adjacency | |
| Examples | |
| -------- | |
| Simple integer weights on edges: | |
| >>> import pandas as pd | |
| >>> pd.options.display.max_columns = 20 | |
| >>> df = pd.DataFrame([[1, 1], [2, 1]]) | |
| >>> df | |
| 0 1 | |
| 0 1 1 | |
| 1 2 1 | |
| >>> G = nx.from_pandas_adjacency(df) | |
| >>> G.name = "Graph from pandas adjacency matrix" | |
| >>> print(G) | |
| Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges | |
| """ | |
| try: | |
| df = df[df.index] | |
| except Exception as err: | |
| missing = list(set(df.index).difference(set(df.columns))) | |
| msg = f"{missing} not in columns" | |
| raise nx.NetworkXError("Columns must match Indices.", msg) from err | |
| A = df.values | |
| G = from_numpy_array(A, create_using=create_using) | |
| nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False) | |
| return G | |
| def to_pandas_edgelist( | |
| G, | |
| source="source", | |
| target="target", | |
| nodelist=None, | |
| dtype=None, | |
| edge_key=None, | |
| ): | |
| """Returns the graph edge list as a Pandas DataFrame. | |
| Parameters | |
| ---------- | |
| G : graph | |
| The NetworkX graph used to construct the Pandas DataFrame. | |
| source : str or int, optional | |
| A valid column name (string or integer) for the source nodes (for the | |
| directed case). | |
| target : str or int, optional | |
| A valid column name (string or integer) for the target nodes (for the | |
| directed case). | |
| nodelist : list, optional | |
| Use only nodes specified in nodelist | |
| dtype : dtype, default None | |
| Use to create the DataFrame. Data type to force. | |
| Only a single dtype is allowed. If None, infer. | |
| edge_key : str or int or None, optional (default=None) | |
| A valid column name (string or integer) for the edge keys (for the | |
| multigraph case). If None, edge keys are not stored in the DataFrame. | |
| Returns | |
| ------- | |
| df : Pandas DataFrame | |
| Graph edge list | |
| Examples | |
| -------- | |
| >>> G = nx.Graph( | |
| ... [ | |
| ... ("A", "B", {"cost": 1, "weight": 7}), | |
| ... ("C", "E", {"cost": 9, "weight": 10}), | |
| ... ] | |
| ... ) | |
| >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"]) | |
| >>> df[["source", "target", "cost", "weight"]] | |
| source target cost weight | |
| 0 A B 1 7 | |
| 1 C E 9 10 | |
| >>> G = nx.MultiGraph([('A', 'B', {'cost': 1}), ('A', 'B', {'cost': 9})]) | |
| >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C'], edge_key='ekey') | |
| >>> df[['source', 'target', 'cost', 'ekey']] | |
| source target cost ekey | |
| 0 A B 1 0 | |
| 1 A B 9 1 | |
| """ | |
| import pandas as pd | |
| if nodelist is None: | |
| edgelist = G.edges(data=True) | |
| else: | |
| edgelist = G.edges(nodelist, data=True) | |
| source_nodes = [s for s, _, _ in edgelist] | |
| target_nodes = [t for _, t, _ in edgelist] | |
| all_attrs = set().union(*(d.keys() for _, _, d in edgelist)) | |
| if source in all_attrs: | |
| raise nx.NetworkXError(f"Source name {source!r} is an edge attr name") | |
| if target in all_attrs: | |
| raise nx.NetworkXError(f"Target name {target!r} is an edge attr name") | |
| nan = float("nan") | |
| edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs} | |
| if G.is_multigraph() and edge_key is not None: | |
| if edge_key in all_attrs: | |
| raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name") | |
| edge_keys = [k for _, _, k in G.edges(keys=True)] | |
| edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys} | |
| else: | |
| edgelistdict = {source: source_nodes, target: target_nodes} | |
| edgelistdict.update(edge_attr) | |
| return pd.DataFrame(edgelistdict, dtype=dtype) | |
| def from_pandas_edgelist( | |
| df, | |
| source="source", | |
| target="target", | |
| edge_attr=None, | |
| create_using=None, | |
| edge_key=None, | |
| ): | |
| """Returns a graph from Pandas DataFrame containing an edge list. | |
| The Pandas DataFrame should contain at least two columns of node names and | |
| zero or more columns of edge attributes. Each row will be processed as one | |
| edge instance. | |
| Note: This function iterates over DataFrame.values, which is not | |
| guaranteed to retain the data type across columns in the row. This is only | |
| a problem if your row is entirely numeric and a mix of ints and floats. In | |
| that case, all values will be returned as floats. See the | |
| DataFrame.iterrows documentation for an example. | |
| Parameters | |
| ---------- | |
| df : Pandas DataFrame | |
| An edge list representation of a graph | |
| source : str or int | |
| A valid column name (string or integer) for the source nodes (for the | |
| directed case). | |
| target : str or int | |
| A valid column name (string or integer) for the target nodes (for the | |
| directed case). | |
| edge_attr : str or int, iterable, True, or None | |
| A valid column name (str or int) or iterable of column names that are | |
| used to retrieve items and add them to the graph as edge attributes. | |
| If `True`, all of the remaining columns will be added. | |
| If `None`, no edge attributes are added to the graph. | |
| create_using : NetworkX graph constructor, optional (default=nx.Graph) | |
| Graph type to create. If graph instance, then cleared before populated. | |
| edge_key : str or None, optional (default=None) | |
| A valid column name for the edge keys (for a MultiGraph). The values in | |
| this column are used for the edge keys when adding edges if create_using | |
| is a multigraph. | |
| If you have node attributes stored in a separate dataframe `df_nodes`, | |
| you can load those attributes to the graph `G` using the following code: | |
| ``` | |
| df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]}) | |
| G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows()) | |
| ``` | |
| See Also | |
| -------- | |
| to_pandas_edgelist | |
| Examples | |
| -------- | |
| Simple integer weights on edges: | |
| >>> import pandas as pd | |
| >>> pd.options.display.max_columns = 20 | |
| >>> import numpy as np | |
| >>> rng = np.random.RandomState(seed=5) | |
| >>> ints = rng.randint(1, 11, size=(3, 2)) | |
| >>> a = ["A", "B", "C"] | |
| >>> b = ["D", "A", "E"] | |
| >>> df = pd.DataFrame(ints, columns=["weight", "cost"]) | |
| >>> df[0] = a | |
| >>> df["b"] = b | |
| >>> df[["weight", "cost", 0, "b"]] | |
| weight cost 0 b | |
| 0 4 7 A D | |
| 1 7 1 B A | |
| 2 10 9 C E | |
| >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"]) | |
| >>> G["E"]["C"]["weight"] | |
| 10 | |
| >>> G["E"]["C"]["cost"] | |
| 9 | |
| >>> edges = pd.DataFrame( | |
| ... { | |
| ... "source": [0, 1, 2], | |
| ... "target": [2, 2, 3], | |
| ... "weight": [3, 4, 5], | |
| ... "color": ["red", "blue", "blue"], | |
| ... } | |
| ... ) | |
| >>> G = nx.from_pandas_edgelist(edges, edge_attr=True) | |
| >>> G[0][2]["color"] | |
| 'red' | |
| Build multigraph with custom keys: | |
| >>> edges = pd.DataFrame( | |
| ... { | |
| ... "source": [0, 1, 2, 0], | |
| ... "target": [2, 2, 3, 2], | |
| ... "my_edge_key": ["A", "B", "C", "D"], | |
| ... "weight": [3, 4, 5, 6], | |
| ... "color": ["red", "blue", "blue", "blue"], | |
| ... } | |
| ... ) | |
| >>> G = nx.from_pandas_edgelist( | |
| ... edges, | |
| ... edge_key="my_edge_key", | |
| ... edge_attr=["weight", "color"], | |
| ... create_using=nx.MultiGraph(), | |
| ... ) | |
| >>> G[0][2] | |
| AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}}) | |
| """ | |
| g = nx.empty_graph(0, create_using) | |
| if edge_attr is None: | |
| g.add_edges_from(zip(df[source], df[target])) | |
| return g | |
| reserved_columns = [source, target] | |
| # Additional columns requested | |
| attr_col_headings = [] | |
| attribute_data = [] | |
| if edge_attr is True: | |
| attr_col_headings = [c for c in df.columns if c not in reserved_columns] | |
| elif isinstance(edge_attr, (list, tuple)): | |
| attr_col_headings = edge_attr | |
| else: | |
| attr_col_headings = [edge_attr] | |
| if len(attr_col_headings) == 0: | |
| raise nx.NetworkXError( | |
| f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}" | |
| ) | |
| try: | |
| attribute_data = zip(*[df[col] for col in attr_col_headings]) | |
| except (KeyError, TypeError) as err: | |
| msg = f"Invalid edge_attr argument: {edge_attr}" | |
| raise nx.NetworkXError(msg) from err | |
| if g.is_multigraph(): | |
| # => append the edge keys from the df to the bundled data | |
| if edge_key is not None: | |
| try: | |
| multigraph_edge_keys = df[edge_key] | |
| attribute_data = zip(attribute_data, multigraph_edge_keys) | |
| except (KeyError, TypeError) as err: | |
| msg = f"Invalid edge_key argument: {edge_key}" | |
| raise nx.NetworkXError(msg) from err | |
| for s, t, attrs in zip(df[source], df[target], attribute_data): | |
| if edge_key is not None: | |
| attrs, multigraph_edge_key = attrs | |
| key = g.add_edge(s, t, key=multigraph_edge_key) | |
| else: | |
| key = g.add_edge(s, t) | |
| g[s][t][key].update(zip(attr_col_headings, attrs)) | |
| else: | |
| for s, t, attrs in zip(df[source], df[target], attribute_data): | |
| g.add_edge(s, t) | |
| g[s][t].update(zip(attr_col_headings, attrs)) | |
| return g | |
| def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"): | |
| """Returns the graph adjacency matrix as a SciPy sparse array. | |
| Parameters | |
| ---------- | |
| G : graph | |
| The NetworkX graph used to construct the sparse matrix. | |
| nodelist : list, optional | |
| The rows and columns are ordered according to the nodes in `nodelist`. | |
| If `nodelist` is None, then the ordering is produced by G.nodes(). | |
| dtype : NumPy data-type, optional | |
| A valid NumPy dtype used to initialize the array. If None, then the | |
| NumPy default is used. | |
| weight : string or None optional (default='weight') | |
| The edge attribute that holds the numerical value used for | |
| the edge weight. If None then all edge weights are 1. | |
| format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'} | |
| The type of the matrix to be returned (default 'csr'). For | |
| some algorithms different implementations of sparse matrices | |
| can perform better. See [1]_ for details. | |
| Returns | |
| ------- | |
| A : SciPy sparse array | |
| Graph adjacency matrix. | |
| Notes | |
| ----- | |
| For directed graphs, matrix entry i,j corresponds to an edge from i to j. | |
| The matrix entries are populated using the edge attribute held in | |
| parameter weight. When an edge does not have that attribute, the | |
| value of the entry is 1. | |
| For multiple edges the matrix values are the sums of the edge weights. | |
| When `nodelist` does not contain every node in `G`, the adjacency matrix | |
| is built from the subgraph of `G` that is induced by the nodes in | |
| `nodelist`. | |
| The convention used for self-loop edges in graphs is to assign the | |
| diagonal matrix entry value to the weight attribute of the edge | |
| (or the number 1 if the edge has no weight attribute). If the | |
| alternate convention of doubling the edge weight is desired the | |
| resulting SciPy sparse array can be modified as follows: | |
| >>> G = nx.Graph([(1, 1)]) | |
| >>> A = nx.to_scipy_sparse_array(G) | |
| >>> print(A.todense()) | |
| [[1]] | |
| >>> A.setdiag(A.diagonal() * 2) | |
| >>> print(A.toarray()) | |
| [[2]] | |
| Examples | |
| -------- | |
| >>> G = nx.MultiDiGraph() | |
| >>> G.add_edge(0, 1, weight=2) | |
| 0 | |
| >>> G.add_edge(1, 0) | |
| 0 | |
| >>> G.add_edge(2, 2, weight=3) | |
| 0 | |
| >>> G.add_edge(2, 2) | |
| 1 | |
| >>> S = nx.to_scipy_sparse_array(G, nodelist=[0, 1, 2]) | |
| >>> print(S.toarray()) | |
| [[0 2 0] | |
| [1 0 0] | |
| [0 0 4]] | |
| References | |
| ---------- | |
| .. [1] Scipy Dev. References, "Sparse Matrices", | |
| https://docs.scipy.org/doc/scipy/reference/sparse.html | |
| """ | |
| import scipy as sp | |
| if len(G) == 0: | |
| raise nx.NetworkXError("Graph has no nodes or edges") | |
| if nodelist is None: | |
| nodelist = list(G) | |
| nlen = len(G) | |
| else: | |
| nlen = len(nodelist) | |
| if nlen == 0: | |
| raise nx.NetworkXError("nodelist has no nodes") | |
| nodeset = set(G.nbunch_iter(nodelist)) | |
| if nlen != len(nodeset): | |
| for n in nodelist: | |
| if n not in G: | |
| raise nx.NetworkXError(f"Node {n} in nodelist is not in G") | |
| raise nx.NetworkXError("nodelist contains duplicates.") | |
| if nlen < len(G): | |
| G = G.subgraph(nodelist) | |
| index = dict(zip(nodelist, range(nlen))) | |
| coefficients = zip( | |
| *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1)) | |
| ) | |
| try: | |
| row, col, data = coefficients | |
| except ValueError: | |
| # there is no edge in the subgraph | |
| row, col, data = [], [], [] | |
| if G.is_directed(): | |
| A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype) | |
| else: | |
| # symmetrize matrix | |
| d = data + data | |
| r = row + col | |
| c = col + row | |
| # selfloop entries get double counted when symmetrizing | |
| # so we subtract the data on the diagonal | |
| selfloops = list(nx.selfloop_edges(G, data=weight, default=1)) | |
| if selfloops: | |
| diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops)) | |
| d += diag_data | |
| r += diag_index | |
| c += diag_index | |
| A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype) | |
| try: | |
| return A.asformat(format) | |
| except ValueError as err: | |
| raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err | |
| def _csr_gen_triples(A): | |
| """Converts a SciPy sparse array in **Compressed Sparse Row** format to | |
| an iterable of weighted edge triples. | |
| """ | |
| nrows = A.shape[0] | |
| data, indices, indptr = A.data, A.indices, A.indptr | |
| for i in range(nrows): | |
| for j in range(indptr[i], indptr[i + 1]): | |
| yield i, int(indices[j]), data[j] | |
| def _csc_gen_triples(A): | |
| """Converts a SciPy sparse array in **Compressed Sparse Column** format to | |
| an iterable of weighted edge triples. | |
| """ | |
| ncols = A.shape[1] | |
| data, indices, indptr = A.data, A.indices, A.indptr | |
| for i in range(ncols): | |
| for j in range(indptr[i], indptr[i + 1]): | |
| yield int(indices[j]), i, data[j] | |
| def _coo_gen_triples(A): | |
| """Converts a SciPy sparse array in **Coordinate** format to an iterable | |
| of weighted edge triples. | |
| """ | |
| return ((int(i), int(j), d) for i, j, d in zip(A.row, A.col, A.data)) | |
| def _dok_gen_triples(A): | |
| """Converts a SciPy sparse array in **Dictionary of Keys** format to an | |
| iterable of weighted edge triples. | |
| """ | |
| for (r, c), v in A.items(): | |
| yield r, c, v | |
| def _generate_weighted_edges(A): | |
| """Returns an iterable over (u, v, w) triples, where u and v are adjacent | |
| vertices and w is the weight of the edge joining u and v. | |
| `A` is a SciPy sparse array (in any format). | |
| """ | |
| if A.format == "csr": | |
| return _csr_gen_triples(A) | |
| if A.format == "csc": | |
| return _csc_gen_triples(A) | |
| if A.format == "dok": | |
| return _dok_gen_triples(A) | |
| # If A is in any other format (including COO), convert it to COO format. | |
| return _coo_gen_triples(A.tocoo()) | |
| def from_scipy_sparse_array( | |
| A, parallel_edges=False, create_using=None, edge_attribute="weight" | |
| ): | |
| """Creates a new graph from an adjacency matrix given as a SciPy sparse | |
| array. | |
| Parameters | |
| ---------- | |
| A: scipy.sparse array | |
| An adjacency matrix representation of a graph | |
| parallel_edges : Boolean | |
| If this is True, `create_using` is a multigraph, and `A` is an | |
| integer matrix, then entry *(i, j)* in the matrix is interpreted as the | |
| number of parallel edges joining vertices *i* and *j* in the graph. | |
| If it is False, then the entries in the matrix are interpreted as | |
| the weight of a single edge joining the vertices. | |
| create_using : NetworkX graph constructor, optional (default=nx.Graph) | |
| Graph type to create. If graph instance, then cleared before populated. | |
| edge_attribute: string | |
| Name of edge attribute to store matrix numeric value. The data will | |
| have the same type as the matrix entry (int, float, (real,imag)). | |
| Notes | |
| ----- | |
| For directed graphs, explicitly mention create_using=nx.DiGraph, | |
| and entry i,j of A corresponds to an edge from i to j. | |
| If `create_using` is :class:`networkx.MultiGraph` or | |
| :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the | |
| entries of `A` are of type :class:`int`, then this function returns a | |
| multigraph (constructed from `create_using`) with parallel edges. | |
| In this case, `edge_attribute` will be ignored. | |
| If `create_using` indicates an undirected multigraph, then only the edges | |
| indicated by the upper triangle of the matrix `A` will be added to the | |
| graph. | |
| Examples | |
| -------- | |
| >>> import scipy as sp | |
| >>> A = sp.sparse.eye(2, 2, 1) | |
| >>> G = nx.from_scipy_sparse_array(A) | |
| If `create_using` indicates a multigraph and the matrix has only integer | |
| entries and `parallel_edges` is False, then the entries will be treated | |
| as weights for edges joining the nodes (without creating parallel edges): | |
| >>> A = sp.sparse.csr_array([[1, 1], [1, 2]]) | |
| >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph) | |
| >>> G[1][1] | |
| AtlasView({0: {'weight': 2}}) | |
| If `create_using` indicates a multigraph and the matrix has only integer | |
| entries and `parallel_edges` is True, then the entries will be treated | |
| as the number of parallel edges joining those two vertices: | |
| >>> A = sp.sparse.csr_array([[1, 1], [1, 2]]) | |
| >>> G = nx.from_scipy_sparse_array( | |
| ... A, parallel_edges=True, create_using=nx.MultiGraph | |
| ... ) | |
| >>> G[1][1] | |
| AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) | |
| """ | |
| G = nx.empty_graph(0, create_using) | |
| n, m = A.shape | |
| if n != m: | |
| raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}") | |
| # Make sure we get even the isolated nodes of the graph. | |
| G.add_nodes_from(range(n)) | |
| # Create an iterable over (u, v, w) triples and for each triple, add an | |
| # edge from u to v with weight w. | |
| triples = _generate_weighted_edges(A) | |
| # If the entries in the adjacency matrix are integers, the graph is a | |
| # multigraph, and parallel_edges is True, then create parallel edges, each | |
| # with weight 1, for each entry in the adjacency matrix. Otherwise, create | |
| # one edge for each positive entry in the adjacency matrix and set the | |
| # weight of that edge to be the entry in the matrix. | |
| if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges: | |
| chain = itertools.chain.from_iterable | |
| # The following line is equivalent to: | |
| # | |
| # for (u, v) in edges: | |
| # for d in range(A[u, v]): | |
| # G.add_edge(u, v, weight=1) | |
| # | |
| triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples) | |
| # If we are creating an undirected multigraph, only add the edges from the | |
| # upper triangle of the matrix. Otherwise, add all the edges. This relies | |
| # on the fact that the vertices created in the | |
| # `_generated_weighted_edges()` function are actually the row/column | |
| # indices for the matrix `A`. | |
| # | |
| # Without this check, we run into a problem where each edge is added twice | |
| # when `G.add_weighted_edges_from()` is invoked below. | |
| if G.is_multigraph() and not G.is_directed(): | |
| triples = ((u, v, d) for u, v, d in triples if u <= v) | |
| G.add_weighted_edges_from(triples, weight=edge_attribute) | |
| return G | |
| # edge attrs may also be obtained from `dtype` | |
| def to_numpy_array( | |
| G, | |
| nodelist=None, | |
| dtype=None, | |
| order=None, | |
| multigraph_weight=sum, | |
| weight="weight", | |
| nonedge=0.0, | |
| ): | |
| """Returns the graph adjacency matrix as a NumPy array. | |
| Parameters | |
| ---------- | |
| G : graph | |
| The NetworkX graph used to construct the NumPy array. | |
| nodelist : list, optional | |
| The rows and columns are ordered according to the nodes in `nodelist`. | |
| If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``. | |
| dtype : NumPy data type, optional | |
| A NumPy data type used to initialize the array. If None, then the NumPy | |
| default is used. The dtype can be structured if `weight=None`, in which | |
| case the dtype field names are used to look up edge attributes. The | |
| result is a structured array where each named field in the dtype | |
| corresponds to the adjacency for that edge attribute. See examples for | |
| details. | |
| order : {'C', 'F'}, optional | |
| Whether to store multidimensional data in C- or Fortran-contiguous | |
| (row- or column-wise) order in memory. If None, then the NumPy default | |
| is used. | |
| multigraph_weight : callable, optional | |
| An function that determines how weights in multigraphs are handled. | |
| The function should accept a sequence of weights and return a single | |
| value. The default is to sum the weights of the multiple edges. | |
| weight : string or None optional (default = 'weight') | |
| The edge attribute that holds the numerical value used for | |
| the edge weight. If an edge does not have that attribute, then the | |
| value 1 is used instead. `weight` must be ``None`` if a structured | |
| dtype is used. | |
| nonedge : array_like (default = 0.0) | |
| The value used to represent non-edges in the adjacency matrix. | |
| The array values corresponding to nonedges are typically set to zero. | |
| However, this could be undesirable if there are array values | |
| corresponding to actual edges that also have the value zero. If so, | |
| one might prefer nonedges to have some other value, such as ``nan``. | |
| Returns | |
| ------- | |
| A : NumPy ndarray | |
| Graph adjacency matrix | |
| Raises | |
| ------ | |
| NetworkXError | |
| If `dtype` is a structured dtype and `G` is a multigraph | |
| ValueError | |
| If `dtype` is a structured dtype and `weight` is not `None` | |
| See Also | |
| -------- | |
| from_numpy_array | |
| Notes | |
| ----- | |
| For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``. | |
| Entries in the adjacency matrix are given by the `weight` edge attribute. | |
| When an edge does not have a weight attribute, the value of the entry is | |
| set to the number 1. For multiple (parallel) edges, the values of the | |
| entries are determined by the `multigraph_weight` parameter. The default is | |
| to sum the weight attributes for each of the parallel edges. | |
| When `nodelist` does not contain every node in `G`, the adjacency matrix is | |
| built from the subgraph of `G` that is induced by the nodes in `nodelist`. | |
| The convention used for self-loop edges in graphs is to assign the | |
| diagonal array entry value to the weight attribute of the edge | |
| (or the number 1 if the edge has no weight attribute). If the | |
| alternate convention of doubling the edge weight is desired the | |
| resulting NumPy array can be modified as follows: | |
| >>> import numpy as np | |
| >>> G = nx.Graph([(1, 1)]) | |
| >>> A = nx.to_numpy_array(G) | |
| >>> A | |
| array([[1.]]) | |
| >>> A[np.diag_indices_from(A)] *= 2 | |
| >>> A | |
| array([[2.]]) | |
| Examples | |
| -------- | |
| >>> G = nx.MultiDiGraph() | |
| >>> G.add_edge(0, 1, weight=2) | |
| 0 | |
| >>> G.add_edge(1, 0) | |
| 0 | |
| >>> G.add_edge(2, 2, weight=3) | |
| 0 | |
| >>> G.add_edge(2, 2) | |
| 1 | |
| >>> nx.to_numpy_array(G, nodelist=[0, 1, 2]) | |
| array([[0., 2., 0.], | |
| [1., 0., 0.], | |
| [0., 0., 4.]]) | |
| When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist` | |
| and their edges are not included in the adjacency matrix. Here is an example: | |
| >>> G = nx.Graph() | |
| >>> G.add_edge(3, 1) | |
| >>> G.add_edge(2, 0) | |
| >>> G.add_edge(2, 1) | |
| >>> G.add_edge(3, 0) | |
| >>> nx.to_numpy_array(G, nodelist=[1, 2, 3]) | |
| array([[0., 1., 1.], | |
| [1., 0., 0.], | |
| [1., 0., 0.]]) | |
| This function can also be used to create adjacency matrices for multiple | |
| edge attributes with structured dtypes: | |
| >>> G = nx.Graph() | |
| >>> G.add_edge(0, 1, weight=10) | |
| >>> G.add_edge(1, 2, cost=5) | |
| >>> G.add_edge(2, 3, weight=3, cost=-4.0) | |
| >>> dtype = np.dtype([("weight", int), ("cost", float)]) | |
| >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None) | |
| >>> A["weight"] | |
| array([[ 0, 10, 0, 0], | |
| [10, 0, 1, 0], | |
| [ 0, 1, 0, 3], | |
| [ 0, 0, 3, 0]]) | |
| >>> A["cost"] | |
| array([[ 0., 1., 0., 0.], | |
| [ 1., 0., 5., 0.], | |
| [ 0., 5., 0., -4.], | |
| [ 0., 0., -4., 0.]]) | |
| As stated above, the argument "nonedge" is useful especially when there are | |
| actually edges with weight 0 in the graph. Setting a nonedge value different than 0, | |
| makes it much clearer to differentiate such 0-weighted edges and actual nonedge values. | |
| >>> G = nx.Graph() | |
| >>> G.add_edge(3, 1, weight=2) | |
| >>> G.add_edge(2, 0, weight=0) | |
| >>> G.add_edge(2, 1, weight=0) | |
| >>> G.add_edge(3, 0, weight=1) | |
| >>> nx.to_numpy_array(G, nonedge=-1.) | |
| array([[-1., 2., -1., 1.], | |
| [ 2., -1., 0., -1.], | |
| [-1., 0., -1., 0.], | |
| [ 1., -1., 0., -1.]]) | |
| """ | |
| import numpy as np | |
| if nodelist is None: | |
| nodelist = list(G) | |
| nlen = len(nodelist) | |
| # Input validation | |
| nodeset = set(nodelist) | |
| if nodeset - set(G): | |
| raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G") | |
| if len(nodeset) < nlen: | |
| raise nx.NetworkXError("nodelist contains duplicates.") | |
| A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order) | |
| # Corner cases: empty nodelist or graph without any edges | |
| if nlen == 0 or G.number_of_edges() == 0: | |
| return A | |
| # If dtype is structured and weight is None, use dtype field names as | |
| # edge attributes | |
| edge_attrs = None # Only single edge attribute by default | |
| if A.dtype.names: | |
| if weight is None: | |
| edge_attrs = dtype.names | |
| else: | |
| raise ValueError( | |
| "Specifying `weight` not supported for structured dtypes\n." | |
| "To create adjacency matrices from structured dtypes, use `weight=None`." | |
| ) | |
| # Map nodes to row/col in matrix | |
| idx = dict(zip(nodelist, range(nlen))) | |
| if len(nodelist) < len(G): | |
| G = G.subgraph(nodelist).copy() | |
| # Collect all edge weights and reduce with `multigraph_weights` | |
| if G.is_multigraph(): | |
| if edge_attrs: | |
| raise nx.NetworkXError( | |
| "Structured arrays are not supported for MultiGraphs" | |
| ) | |
| d = defaultdict(list) | |
| for u, v, wt in G.edges(data=weight, default=1.0): | |
| d[(idx[u], idx[v])].append(wt) | |
| i, j = np.array(list(d.keys())).T # indices | |
| wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights | |
| else: | |
| i, j, wts = [], [], [] | |
| # Special branch: multi-attr adjacency from structured dtypes | |
| if edge_attrs: | |
| # Extract edges with all data | |
| for u, v, data in G.edges(data=True): | |
| i.append(idx[u]) | |
| j.append(idx[v]) | |
| wts.append(data) | |
| # Map each attribute to the appropriate named field in the | |
| # structured dtype | |
| for attr in edge_attrs: | |
| attr_data = [wt.get(attr, 1.0) for wt in wts] | |
| A[attr][i, j] = attr_data | |
| if not G.is_directed(): | |
| A[attr][j, i] = attr_data | |
| return A | |
| for u, v, wt in G.edges(data=weight, default=1.0): | |
| i.append(idx[u]) | |
| j.append(idx[v]) | |
| wts.append(wt) | |
| # Set array values with advanced indexing | |
| A[i, j] = wts | |
| if not G.is_directed(): | |
| A[j, i] = wts | |
| return A | |
| def from_numpy_array(A, parallel_edges=False, create_using=None, edge_attr="weight"): | |
| """Returns a graph from a 2D NumPy array. | |
| The 2D NumPy array is interpreted as an adjacency matrix for the graph. | |
| Parameters | |
| ---------- | |
| A : a 2D numpy.ndarray | |
| An adjacency matrix representation of a graph | |
| parallel_edges : Boolean | |
| If this is True, `create_using` is a multigraph, and `A` is an | |
| integer array, then entry *(i, j)* in the array is interpreted as the | |
| number of parallel edges joining vertices *i* and *j* in the graph. | |
| If it is False, then the entries in the array are interpreted as | |
| the weight of a single edge joining the vertices. | |
| create_using : NetworkX graph constructor, optional (default=nx.Graph) | |
| Graph type to create. If graph instance, then cleared before populated. | |
| edge_attr : String, optional (default="weight") | |
| The attribute to which the array values are assigned on each edge. If | |
| it is None, edge attributes will not be assigned. | |
| Notes | |
| ----- | |
| For directed graphs, explicitly mention create_using=nx.DiGraph, | |
| and entry i,j of A corresponds to an edge from i to j. | |
| If `create_using` is :class:`networkx.MultiGraph` or | |
| :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the | |
| entries of `A` are of type :class:`int`, then this function returns a | |
| multigraph (of the same type as `create_using`) with parallel edges. | |
| If `create_using` indicates an undirected multigraph, then only the edges | |
| indicated by the upper triangle of the array `A` will be added to the | |
| graph. | |
| If `edge_attr` is Falsy (False or None), edge attributes will not be | |
| assigned, and the array data will be treated like a binary mask of | |
| edge presence or absence. Otherwise, the attributes will be assigned | |
| as follows: | |
| If the NumPy array has a single data type for each array entry it | |
| will be converted to an appropriate Python data type. | |
| If the NumPy array has a user-specified compound data type the names | |
| of the data fields will be used as attribute keys in the resulting | |
| NetworkX graph. | |
| See Also | |
| -------- | |
| to_numpy_array | |
| Examples | |
| -------- | |
| Simple integer weights on edges: | |
| >>> import numpy as np | |
| >>> A = np.array([[1, 1], [2, 1]]) | |
| >>> G = nx.from_numpy_array(A) | |
| >>> G.edges(data=True) | |
| EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})]) | |
| If `create_using` indicates a multigraph and the array has only integer | |
| entries and `parallel_edges` is False, then the entries will be treated | |
| as weights for edges joining the nodes (without creating parallel edges): | |
| >>> A = np.array([[1, 1], [1, 2]]) | |
| >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph) | |
| >>> G[1][1] | |
| AtlasView({0: {'weight': 2}}) | |
| If `create_using` indicates a multigraph and the array has only integer | |
| entries and `parallel_edges` is True, then the entries will be treated | |
| as the number of parallel edges joining those two vertices: | |
| >>> A = np.array([[1, 1], [1, 2]]) | |
| >>> temp = nx.MultiGraph() | |
| >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp) | |
| >>> G[1][1] | |
| AtlasView({0: {'weight': 1}, 1: {'weight': 1}}) | |
| User defined compound data type on edges: | |
| >>> dt = [("weight", float), ("cost", int)] | |
| >>> A = np.array([[(1.0, 2)]], dtype=dt) | |
| >>> G = nx.from_numpy_array(A) | |
| >>> G.edges() | |
| EdgeView([(0, 0)]) | |
| >>> G[0][0]["cost"] | |
| 2 | |
| >>> G[0][0]["weight"] | |
| 1.0 | |
| """ | |
| kind_to_python_type = { | |
| "f": float, | |
| "i": int, | |
| "u": int, | |
| "b": bool, | |
| "c": complex, | |
| "S": str, | |
| "U": str, | |
| "V": "void", | |
| } | |
| G = nx.empty_graph(0, create_using) | |
| if A.ndim != 2: | |
| raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}") | |
| n, m = A.shape | |
| if n != m: | |
| raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}") | |
| dt = A.dtype | |
| try: | |
| python_type = kind_to_python_type[dt.kind] | |
| except Exception as err: | |
| raise TypeError(f"Unknown numpy data type: {dt}") from err | |
| # Make sure we get even the isolated nodes of the graph. | |
| G.add_nodes_from(range(n)) | |
| # Get a list of all the entries in the array with nonzero entries. These | |
| # coordinates become edges in the graph. (convert to int from np.int64) | |
| edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero())) | |
| # handle numpy constructed data type | |
| if python_type == "void": | |
| # Sort the fields by their offset, then by dtype, then by name. | |
| fields = sorted( | |
| (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items() | |
| ) | |
| triples = ( | |
| ( | |
| u, | |
| v, | |
| {} | |
| if edge_attr in [False, None] | |
| else { | |
| name: kind_to_python_type[dtype.kind](val) | |
| for (_, dtype, name), val in zip(fields, A[u, v]) | |
| }, | |
| ) | |
| for u, v in edges | |
| ) | |
| # If the entries in the adjacency matrix are integers, the graph is a | |
| # multigraph, and parallel_edges is True, then create parallel edges, each | |
| # with weight 1, for each entry in the adjacency matrix. Otherwise, create | |
| # one edge for each positive entry in the adjacency matrix and set the | |
| # weight of that edge to be the entry in the matrix. | |
| elif python_type is int and G.is_multigraph() and parallel_edges: | |
| chain = itertools.chain.from_iterable | |
| # The following line is equivalent to: | |
| # | |
| # for (u, v) in edges: | |
| # for d in range(A[u, v]): | |
| # G.add_edge(u, v, weight=1) | |
| # | |
| if edge_attr in [False, None]: | |
| triples = chain(((u, v, {}) for d in range(A[u, v])) for (u, v) in edges) | |
| else: | |
| triples = chain( | |
| ((u, v, {edge_attr: 1}) for d in range(A[u, v])) for (u, v) in edges | |
| ) | |
| else: # basic data type | |
| if edge_attr in [False, None]: | |
| triples = ((u, v, {}) for u, v in edges) | |
| else: | |
| triples = ((u, v, {edge_attr: python_type(A[u, v])}) for u, v in edges) | |
| # If we are creating an undirected multigraph, only add the edges from the | |
| # upper triangle of the matrix. Otherwise, add all the edges. This relies | |
| # on the fact that the vertices created in the | |
| # `_generated_weighted_edges()` function are actually the row/column | |
| # indices for the matrix `A`. | |
| # | |
| # Without this check, we run into a problem where each edge is added twice | |
| # when `G.add_edges_from()` is invoked below. | |
| if G.is_multigraph() and not G.is_directed(): | |
| triples = ((u, v, d) for u, v, d in triples if u <= v) | |
| G.add_edges_from(triples) | |
| return G | |