Eyob-Sol's picture
Upload 24 files
dbc8c36 verified
Raw
History Blame Contribute Delete
8.15 kB
"""
Minimal Collatz subtree construction up to N, using the structural
branch algorithm from the paper.
This file implements:
- Algorithm 5: GenerateCollatzBranches
- Algorithm 6: BuildCollatzSubtreeEdges
The combined effect is to construct the minimal Collatz subtree
(containing all natural numbers 1..N, plus necessary ancestors)
as a directed graph with edges (child -> parent) following the
forward Collatz map:
T(n) = n/2 if n is even
3n + 1 if n is odd
Edges are of the form (n, T(n)), including the structural edge (1, 4).
"""
from __future__ import annotations
from typing import Dict, List, Tuple
import pandas as pd
# ============================================================
# Algorithm 5: GenerateCollatzBranches
# ============================================================
def generate_collatz_branches(N: int) -> Dict[int, List[int]]:
"""
Implement Algorithm 5: GenerateCollatzBranches.
Parameters
----------
N : int
Natural number N (upper bound for starting odd numbers).
Returns
-------
Dict[int, List[int]]
Dictionary `Tree` mapping each odd root y to an increasing list
of values of the form y * 2^m. Initially, each odd x ≤ N is mapped
to its doubling sequence up to N, then extended as needed when
exploring Collatz branches from all odd x in [3, N].
Notes
-----
Structural summary:
1) For each odd x ≤ N, create the base branch:
Tree[x] = [x, 2x, 4x, ..., 2^k x] (all ≤ N).
2) For each odd x from 3 to N, repeatedly perform:
b = 3x + 1
m = b & (-b) # largest power of 2 dividing b
y = b // m # odd part of b
and extend the branch for y so that it contains b (and any
intermediate doublings) if needed. Then set x <- y and repeat
until x = 1 or we break due to an already-covered b.
"""
if N < 1:
raise ValueError("N must be a positive integer.")
Tree: Dict[int, List[int]] = {}
# ------------------------------------------------------------
# Lines 2–7: For all odd numbers x ≤ N, initialize base branches
# ------------------------------------------------------------
for x in range(1, N + 1, 2): # odd x: 1, 3, 5, ...
seq: List[int] = []
power = 1 # corresponds to 2^0, 2^1, 2^2, ...
while x * power <= N:
seq.append(x * power)
power *= 2
Tree[x] = seq
# ------------------------------------------------------------
# Lines 8–33: Extend branches using the 3x+1 structure
# ------------------------------------------------------------
for start_x in range(3, N + 1, 2): # x from 3 to N, odd
x = start_x
# while x ≠ 1 do
while x != 1:
# b ← 3x + 1
b = 3 * x + 1
# m ← b ∧ (−b) (least significant 1 bit, largest power of 2 dividing b)
m = b & -b
# y ← b / m (odd part)
y = b // m
# if y ∈ Tree then
if y in Tree:
# if b ∉ Tree[y] then
if b not in Tree[y]:
# power ← 2^{len(Tree[y])}
power = 1 << len(Tree[y])
# while y · power ≤ b do
while y * power <= b:
# Append y · power to Tree[y]
Tree[y].append(y * power)
# power ← power · 2
power *= 2
else:
# else break
break
else:
# else:
# power ← 1, seq ← []
power = 1
seq: List[int] = []
# while y · power ≤ b do
while y * power <= b:
# Append y · power to seq
seq.append(y * power)
# power ← power · 2
power *= 2
# Tree[y] ← seq
Tree[y] = seq
# x ← y
x = y
# Line 34: return Tree
return Tree
# ============================================================
# Algorithm 6: BuildCollatzSubtreeEdges
# ============================================================
def build_collatz_subtree_edges(N: int, Tree: Dict[int, List[int]]) -> pd.DataFrame:
"""
Implement Algorithm 6: BuildCollatzSubtreeEdges.
Parameters
----------
N : int
Upper bound N (not explicitly used inside, but kept for interface
consistency with the paper).
Tree : Dict[int, List[int]]
Dictionary produced by generate_collatz_branches(N).
Returns
-------
pd.DataFrame
DataFrame with columns ["Source", "Target"], encoding directed
edges (child -> parent) of a connected minimal Collatz subtree
containing all integers up to N.
The pseudocode is followed line by line:
1) For each branch list S in Tree, add edges S[i] -> S[i-1].
2) For each odd root x > 1, compute:
b = 3x + 1
m = b & (-b)
y = b / m
find the index t of b in Tree[y], and add edge:
Tree[x][0] -> Tree[y][t]
which corresponds structurally to x -> 3x + 1.
3) Finally, add the structural edge (1, 4).
"""
edges: List[Tuple[int, int]] = []
# ------------------------------------------------------------
# Lines 2–6: For all branches (r, S) in Tree
# ------------------------------------------------------------
for _, S in Tree.items():
# S = [r, 2r, 4r, ...]; add edges S[i] -> S[i-1] for i = 1..|S|-1
for i in range(1, len(S)):
child = S[i]
parent = S[i - 1]
edges.append((child, parent))
# ------------------------------------------------------------
# Lines 7–13: Link branches via the 3x+1 structure
# ------------------------------------------------------------
for x in sorted(Tree.keys()):
if x > 1: # only odd roots greater than 1
# b = 3x + 1
b = 3 * x + 1
# m = b ∧ (-b)
m = b & -b
# y = b / m
y = b // m
# Let i ← index of b in Tree[y]
if y not in Tree:
# If Tree[y] does not exist, structure is inconsistent; skip.
continue
try:
t = Tree[y].index(b)
except ValueError:
# If b is not present in Tree[y], skip this link.
continue
# Add edge (Tree[x][0], Tree[y][i]) to edges
root_x = Tree[x][0] # odd root x
match_y = Tree[y][t] # this is b itself
edges.append((root_x, match_y))
# ------------------------------------------------------------
# Line 14: Add edge (1, 4)
# ------------------------------------------------------------
edges.append((1, 4))
# ------------------------------------------------------------
# Line 15: return edges as DataFrame
# ------------------------------------------------------------
df = pd.DataFrame(edges, columns=["Source", "Target"]).drop_duplicates()
return df
# ============================================================
# Convenience wrapper: minimal subtree edges up to N
# ============================================================
def generate_minimal_collatz_subtree_edges(N: int) -> pd.DataFrame:
"""
High-level helper that runs Algorithm 5 and Algorithm 6:
Tree = GenerateCollatzBranches(N)
edges = BuildCollatzSubtreeEdges(N, Tree)
and returns the resulting edge DataFrame.
Parameters
----------
N : int
Upper bound on the natural numbers to be included (1..N).
Returns
-------
pd.DataFrame
Edge list with columns ["Source", "Target"].
"""
Tree = generate_collatz_branches(N)
df_edges = build_collatz_subtree_edges(N, Tree)
return df_edges