|
|
"""Functions for generating trees.""" |
|
|
from collections import defaultdict |
|
|
|
|
|
import networkx as nx |
|
|
from networkx.utils import py_random_state |
|
|
|
|
|
__all__ = ["prefix_tree", "random_tree", "prefix_tree_recursive"] |
|
|
|
|
|
|
|
|
def prefix_tree(paths): |
|
|
"""Creates a directed prefix tree from a list of paths. |
|
|
|
|
|
Usually the paths are described as strings or lists of integers. |
|
|
|
|
|
A "prefix tree" represents the prefix structure of the strings. |
|
|
Each node represents a prefix of some string. The root represents |
|
|
the empty prefix with children for the single letter prefixes which |
|
|
in turn have children for each double letter prefix starting with |
|
|
the single letter corresponding to the parent node, and so on. |
|
|
|
|
|
More generally the prefixes do not need to be strings. A prefix refers |
|
|
to the start of a sequence. The root has children for each one element |
|
|
prefix and they have children for each two element prefix that starts |
|
|
with the one element sequence of the parent, and so on. |
|
|
|
|
|
Note that this implementation uses integer nodes with an attribute. |
|
|
Each node has an attribute "source" whose value is the original element |
|
|
of the path to which this node corresponds. For example, suppose `paths` |
|
|
consists of one path: "can". Then the nodes `[1, 2, 3]` which represent |
|
|
this path have "source" values "c", "a" and "n". |
|
|
|
|
|
All the descendants of a node have a common prefix in the sequence/path |
|
|
associated with that node. From the returned tree, the prefix for each |
|
|
node can be constructed by traversing the tree up to the root and |
|
|
accumulating the "source" values along the way. |
|
|
|
|
|
The root node is always `0` and has "source" attribute `None`. |
|
|
The root is the only node with in-degree zero. |
|
|
The nil node is always `-1` and has "source" attribute `"NIL"`. |
|
|
The nil node is the only node with out-degree zero. |
|
|
|
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
paths: iterable of paths |
|
|
An iterable of paths which are themselves sequences. |
|
|
Matching prefixes among these sequences are identified with |
|
|
nodes of the prefix tree. One leaf of the tree is associated |
|
|
with each path. (Identical paths are associated with the same |
|
|
leaf of the tree.) |
|
|
|
|
|
|
|
|
Returns |
|
|
------- |
|
|
tree: DiGraph |
|
|
A directed graph representing an arborescence consisting of the |
|
|
prefix tree generated by `paths`. Nodes are directed "downward", |
|
|
from parent to child. A special "synthetic" root node is added |
|
|
to be the parent of the first node in each path. A special |
|
|
"synthetic" leaf node, the "nil" node `-1`, is added to be the child |
|
|
of all nodes representing the last element in a path. (The |
|
|
addition of this nil node technically makes this not an |
|
|
arborescence but a directed acyclic graph; removing the nil node |
|
|
makes it an arborescence.) |
|
|
|
|
|
|
|
|
Notes |
|
|
----- |
|
|
The prefix tree is also known as a *trie*. |
|
|
|
|
|
|
|
|
Examples |
|
|
-------- |
|
|
Create a prefix tree from a list of strings with common prefixes:: |
|
|
|
|
|
>>> paths = ["ab", "abs", "ad"] |
|
|
>>> T = nx.prefix_tree(paths) |
|
|
>>> list(T.edges) |
|
|
[(0, 1), (1, 2), (1, 4), (2, -1), (2, 3), (3, -1), (4, -1)] |
|
|
|
|
|
The leaf nodes can be obtained as predecessors of the nil node:: |
|
|
|
|
|
>>> root, NIL = 0, -1 |
|
|
>>> list(T.predecessors(NIL)) |
|
|
[2, 3, 4] |
|
|
|
|
|
To recover the original paths that generated the prefix tree, |
|
|
traverse up the tree from the node `-1` to the node `0`:: |
|
|
|
|
|
>>> recovered = [] |
|
|
>>> for v in T.predecessors(NIL): |
|
|
... prefix = "" |
|
|
... while v != root: |
|
|
... prefix = str(T.nodes[v]["source"]) + prefix |
|
|
... v = next(T.predecessors(v)) # only one predecessor |
|
|
... recovered.append(prefix) |
|
|
>>> sorted(recovered) |
|
|
['ab', 'abs', 'ad'] |
|
|
""" |
|
|
|
|
|
def get_children(parent, paths): |
|
|
children = defaultdict(list) |
|
|
|
|
|
|
|
|
for path in paths: |
|
|
|
|
|
if not path: |
|
|
tree.add_edge(parent, NIL) |
|
|
continue |
|
|
child, *rest = path |
|
|
|
|
|
children[child].append(rest) |
|
|
return children |
|
|
|
|
|
|
|
|
tree = nx.DiGraph() |
|
|
root = 0 |
|
|
tree.add_node(root, source=None) |
|
|
NIL = -1 |
|
|
tree.add_node(NIL, source="NIL") |
|
|
children = get_children(root, paths) |
|
|
stack = [(root, iter(children.items()))] |
|
|
while stack: |
|
|
parent, remaining_children = stack[-1] |
|
|
try: |
|
|
child, remaining_paths = next(remaining_children) |
|
|
|
|
|
except StopIteration: |
|
|
stack.pop() |
|
|
continue |
|
|
|
|
|
new_name = len(tree) - 1 |
|
|
|
|
|
tree.add_node(new_name, source=child) |
|
|
tree.add_edge(parent, new_name) |
|
|
children = get_children(new_name, remaining_paths) |
|
|
stack.append((new_name, iter(children.items()))) |
|
|
|
|
|
return tree |
|
|
|
|
|
|
|
|
def prefix_tree_recursive(paths): |
|
|
"""Recursively creates a directed prefix tree from a list of paths. |
|
|
|
|
|
The original recursive version of prefix_tree for comparison. It is |
|
|
the same algorithm but the recursion is unrolled onto a stack. |
|
|
|
|
|
Usually the paths are described as strings or lists of integers. |
|
|
|
|
|
A "prefix tree" represents the prefix structure of the strings. |
|
|
Each node represents a prefix of some string. The root represents |
|
|
the empty prefix with children for the single letter prefixes which |
|
|
in turn have children for each double letter prefix starting with |
|
|
the single letter corresponding to the parent node, and so on. |
|
|
|
|
|
More generally the prefixes do not need to be strings. A prefix refers |
|
|
to the start of a sequence. The root has children for each one element |
|
|
prefix and they have children for each two element prefix that starts |
|
|
with the one element sequence of the parent, and so on. |
|
|
|
|
|
Note that this implementation uses integer nodes with an attribute. |
|
|
Each node has an attribute "source" whose value is the original element |
|
|
of the path to which this node corresponds. For example, suppose `paths` |
|
|
consists of one path: "can". Then the nodes `[1, 2, 3]` which represent |
|
|
this path have "source" values "c", "a" and "n". |
|
|
|
|
|
All the descendants of a node have a common prefix in the sequence/path |
|
|
associated with that node. From the returned tree, ehe prefix for each |
|
|
node can be constructed by traversing the tree up to the root and |
|
|
accumulating the "source" values along the way. |
|
|
|
|
|
The root node is always `0` and has "source" attribute `None`. |
|
|
The root is the only node with in-degree zero. |
|
|
The nil node is always `-1` and has "source" attribute `"NIL"`. |
|
|
The nil node is the only node with out-degree zero. |
|
|
|
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
paths: iterable of paths |
|
|
An iterable of paths which are themselves sequences. |
|
|
Matching prefixes among these sequences are identified with |
|
|
nodes of the prefix tree. One leaf of the tree is associated |
|
|
with each path. (Identical paths are associated with the same |
|
|
leaf of the tree.) |
|
|
|
|
|
|
|
|
Returns |
|
|
------- |
|
|
tree: DiGraph |
|
|
A directed graph representing an arborescence consisting of the |
|
|
prefix tree generated by `paths`. Nodes are directed "downward", |
|
|
from parent to child. A special "synthetic" root node is added |
|
|
to be the parent of the first node in each path. A special |
|
|
"synthetic" leaf node, the "nil" node `-1`, is added to be the child |
|
|
of all nodes representing the last element in a path. (The |
|
|
addition of this nil node technically makes this not an |
|
|
arborescence but a directed acyclic graph; removing the nil node |
|
|
makes it an arborescence.) |
|
|
|
|
|
|
|
|
Notes |
|
|
----- |
|
|
The prefix tree is also known as a *trie*. |
|
|
|
|
|
|
|
|
Examples |
|
|
-------- |
|
|
Create a prefix tree from a list of strings with common prefixes:: |
|
|
|
|
|
>>> paths = ["ab", "abs", "ad"] |
|
|
>>> T = nx.prefix_tree(paths) |
|
|
>>> list(T.edges) |
|
|
[(0, 1), (1, 2), (1, 4), (2, -1), (2, 3), (3, -1), (4, -1)] |
|
|
|
|
|
The leaf nodes can be obtained as predecessors of the nil node. |
|
|
|
|
|
>>> root, NIL = 0, -1 |
|
|
>>> list(T.predecessors(NIL)) |
|
|
[2, 3, 4] |
|
|
|
|
|
To recover the original paths that generated the prefix tree, |
|
|
traverse up the tree from the node `-1` to the node `0`:: |
|
|
|
|
|
>>> recovered = [] |
|
|
>>> for v in T.predecessors(NIL): |
|
|
... prefix = "" |
|
|
... while v != root: |
|
|
... prefix = str(T.nodes[v]["source"]) + prefix |
|
|
... v = next(T.predecessors(v)) # only one predecessor |
|
|
... recovered.append(prefix) |
|
|
>>> sorted(recovered) |
|
|
['ab', 'abs', 'ad'] |
|
|
""" |
|
|
|
|
|
def _helper(paths, root, tree): |
|
|
"""Recursively create a trie from the given list of paths. |
|
|
|
|
|
`paths` is a list of paths, each of which is itself a list of |
|
|
nodes, relative to the given `root` (but not including it). This |
|
|
list of paths will be interpreted as a tree-like structure, in |
|
|
which two paths that share a prefix represent two branches of |
|
|
the tree with the same initial segment. |
|
|
|
|
|
`root` is the parent of the node at index 0 in each path. |
|
|
|
|
|
`tree` is the "accumulator", the :class:`networkx.DiGraph` |
|
|
representing the branching to which the new nodes and edges will |
|
|
be added. |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
children = defaultdict(list) |
|
|
for path in paths: |
|
|
|
|
|
if not path: |
|
|
tree.add_edge(root, NIL) |
|
|
continue |
|
|
child, *rest = path |
|
|
|
|
|
children[child].append(rest) |
|
|
|
|
|
for child, remaining_paths in children.items(): |
|
|
|
|
|
new_name = len(tree) - 1 |
|
|
|
|
|
tree.add_node(new_name, source=child) |
|
|
tree.add_edge(root, new_name) |
|
|
_helper(remaining_paths, new_name, tree) |
|
|
|
|
|
|
|
|
tree = nx.DiGraph() |
|
|
root = 0 |
|
|
tree.add_node(root, source=None) |
|
|
NIL = -1 |
|
|
tree.add_node(NIL, source="NIL") |
|
|
|
|
|
_helper(paths, root, tree) |
|
|
return tree |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@py_random_state(1) |
|
|
def random_tree(n, seed=None, create_using=None): |
|
|
"""Returns a uniformly random tree on `n` nodes. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
n : int |
|
|
A positive integer representing the number of nodes in the tree. |
|
|
seed : integer, random_state, or None (default) |
|
|
Indicator of random number generation state. |
|
|
See :ref:`Randomness<randomness>`. |
|
|
create_using : NetworkX graph constructor, optional (default=nx.Graph) |
|
|
Graph type to create. If graph instance, then cleared before populated. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
NetworkX graph |
|
|
A tree, given as an undirected graph, whose nodes are numbers in |
|
|
the set {0, …, *n* - 1}. |
|
|
|
|
|
Raises |
|
|
------ |
|
|
NetworkXPointlessConcept |
|
|
If `n` is zero (because the null graph is not a tree). |
|
|
|
|
|
Notes |
|
|
----- |
|
|
The current implementation of this function generates a uniformly |
|
|
random Prüfer sequence then converts that to a tree via the |
|
|
:func:`~networkx.from_prufer_sequence` function. Since there is a |
|
|
bijection between Prüfer sequences of length *n* - 2 and trees on |
|
|
*n* nodes, the tree is chosen uniformly at random from the set of |
|
|
all trees on *n* nodes. |
|
|
|
|
|
Examples |
|
|
-------- |
|
|
>>> tree = nx.random_tree(n=10, seed=0) |
|
|
>>> print(nx.forest_str(tree, sources=[0])) |
|
|
╙── 0 |
|
|
├── 3 |
|
|
└── 4 |
|
|
├── 6 |
|
|
│  ├── 1 |
|
|
│  ├── 2 |
|
|
│  └── 7 |
|
|
│  └── 8 |
|
|
│  └── 5 |
|
|
└── 9 |
|
|
|
|
|
>>> tree = nx.random_tree(n=10, seed=0, create_using=nx.DiGraph) |
|
|
>>> print(nx.forest_str(tree)) |
|
|
╙── 0 |
|
|
├─╼ 3 |
|
|
└─╼ 4 |
|
|
├─╼ 6 |
|
|
│  ├─╼ 1 |
|
|
│  ├─╼ 2 |
|
|
│  └─╼ 7 |
|
|
│  └─╼ 8 |
|
|
│  └─╼ 5 |
|
|
└─╼ 9 |
|
|
""" |
|
|
if n == 0: |
|
|
raise nx.NetworkXPointlessConcept("the null graph is not a tree") |
|
|
|
|
|
if n == 1: |
|
|
utree = nx.empty_graph(1, create_using) |
|
|
else: |
|
|
sequence = [seed.choice(range(n)) for i in range(n - 2)] |
|
|
utree = nx.from_prufer_sequence(sequence) |
|
|
|
|
|
if create_using is None: |
|
|
tree = utree |
|
|
else: |
|
|
tree = nx.empty_graph(0, create_using) |
|
|
if tree.is_directed(): |
|
|
|
|
|
edges = nx.dfs_edges(utree, source=0) |
|
|
else: |
|
|
edges = utree.edges |
|
|
|
|
|
|
|
|
tree.add_nodes_from(utree.nodes) |
|
|
tree.add_edges_from(edges) |
|
|
|
|
|
return tree |
|
|
|