carbon-demo / scripts /render_tree_mockups_v2.py
tfrere's picture
tfrere HF Staff
Add §7 species tree, slim down §6 UMAP, mount /experiments
1f90847
raw
history blame
15.8 kB
"""Round 2 of species tree mockups, focusing on visual quality.
D. tree + alignment tracks (iTOL / Nature 2025 style)
E. editorial Hillis (rounded thick branches, airy)
F. radial with kingdom arcs as background bands
"""
import json
import os
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.path import Path
from matplotlib import font_manager
import numpy as np
from scipy.cluster.hierarchy import dendrogram, leaves_list
HERE = os.path.dirname(os.path.abspath(__file__))
DATA = os.path.join(os.path.dirname(HERE), "data")
OUT = os.path.join(DATA, "mockups")
os.makedirs(OUT, exist_ok=True)
PAPER = "#fbfaf6"
INK = "#1f1f1d"
MUTED = "#888888"
SOFT = "#bbb8ad"
GRID = "#e5e3da"
# Less saturated kingdom palette (Krzywinski-style: muted, only one accent)
KINGDOM_COLOR = {
"vertebrates": "#1f1f1d", # ink (the "main" cohort)
"invertebrates": "#7a6242",
"plants": "#317f3f",
"fungi": "#a9762f",
"bacteria": "#b00020",
"viruses": "#2c5aa0",
}
# Very pale background tints for kingdom bands
KINGDOM_BG = {
"vertebrates": "#ecebe5",
"invertebrates": "#ece4d6",
"plants": "#e3eee2",
"fungi": "#f1e7d2",
"bacteria": "#f3dcd8",
"viruses": "#dde5f0",
}
def setup_font():
for name in ("JetBrains Mono", "Menlo", "Monaco", "DejaVu Sans Mono"):
if any(name in f.name for f in font_manager.fontManager.ttflist):
plt.rcParams["font.family"] = name
return
setup_font()
plt.rcParams["axes.facecolor"] = PAPER
plt.rcParams["figure.facecolor"] = PAPER
plt.rcParams["savefig.facecolor"] = PAPER
NCBI_VERTEBRATE_ORDER = [
"human", "macaque", "mouse", "rat", "dog", "cow", "pig",
"chicken", "frog", "zebrafish",
]
def load_tree():
with open(os.path.join(DATA, "species_tree.json")) as f:
return json.load(f)
# ----------------------------------------------------------------------
# D. tree + alignment tracks (iTOL / Nature)
# ----------------------------------------------------------------------
def render_tree_with_tracks(tree, path):
species = tree["species"]
kingdom = dict(zip(species, tree["kingdom"]))
counts = dict(zip(species, tree["counts"]))
Z = np.array(tree["linkage_ward"])
# Resolve dendrogram leaf order for vertical alignment
ddata = dendrogram(Z, no_plot=True, labels=species)
leaf_order = ddata["ivl"]
# Build figure with: tree (left) | label | kingdom chip | count bar
fig = plt.figure(figsize=(13, 9))
gs = fig.add_gridspec(
1, 4, width_ratios=[5, 2.5, 0.7, 3.0], wspace=0.05,
)
ax_tree = fig.add_subplot(gs[0])
ax_label = fig.add_subplot(gs[1], sharey=ax_tree)
ax_chip = fig.add_subplot(gs[2], sharey=ax_tree)
ax_count = fig.add_subplot(gs[3], sharey=ax_tree)
# Tree (rectangular, right-oriented)
dendrogram(
Z, ax=ax_tree, orientation="right",
labels=species, color_threshold=0,
above_threshold_color=SOFT, no_labels=True,
link_color_func=lambda k: SOFT,
)
ax_tree.set_xlabel("cosine distance", fontsize=8, color=MUTED)
for spine in ("top", "right", "left"):
ax_tree.spines[spine].set_visible(False)
ax_tree.spines["bottom"].set_color(GRID)
ax_tree.tick_params(axis="x", colors=MUTED, labelsize=7, length=2)
ax_tree.tick_params(axis="y", length=0, labelleft=False)
ax_tree.grid(axis="x", linestyle=":", color=GRID, alpha=0.7)
ax_tree.set_axisbelow(True)
ax_tree.invert_xaxis() # tip on the right, root on the left
# Each leaf y-position from dendrogram is 5, 15, 25... (5 + 10*i)
n = len(leaf_order)
leaf_y = [5 + 10 * i for i in range(n)]
leaf_to_y = dict(zip(leaf_order, leaf_y))
# Label column
ax_label.set_xlim(0, 1)
ax_label.set_ylim(0, n * 10)
ax_label.invert_yaxis()
for i, sp in enumerate(leaf_order):
ax_label.text(
0.05, leaf_y[i], sp,
color=KINGDOM_COLOR.get(kingdom.get(sp), INK),
fontsize=12, ha="left", va="center",
)
ax_label.axis("off")
# Kingdom chip column (filled square)
ax_chip.set_xlim(0, 1)
ax_chip.set_ylim(0, n * 10)
ax_chip.invert_yaxis()
for i, sp in enumerate(leaf_order):
kc = KINGDOM_COLOR.get(kingdom.get(sp), INK)
ax_chip.add_patch(mpatches.Rectangle(
(0.25, leaf_y[i] - 3), 0.5, 6,
facecolor=kc, edgecolor="none",
))
ax_chip.axis("off")
# Count bar column (log scale because human=59K, hiv1=10)
max_count = max(counts.values())
log_max = np.log10(max_count + 1)
ax_count.set_xlim(0, log_max * 1.05)
ax_count.set_ylim(0, n * 10)
ax_count.invert_yaxis()
for i, sp in enumerate(leaf_order):
c = counts.get(sp, 0)
log_c = np.log10(c + 1)
ax_count.add_patch(mpatches.Rectangle(
(0, leaf_y[i] - 3), log_c, 6,
facecolor="#d8d5c8", edgecolor="none",
))
ax_count.text(
log_c + 0.05, leaf_y[i], f"{c:,}",
color=MUTED, fontsize=9, ha="left", va="center",
)
for spine in ax_count.spines.values():
spine.set_visible(False)
ax_count.tick_params(axis="both", length=0, labelleft=False, labelbottom=False)
ax_count.set_xlabel("sequences (log)", fontsize=8, color=MUTED)
# Header
fig.text(
0.06, 0.96,
"§7 · CARBON SPECIES TREE",
color="#317f3f", fontsize=10, fontweight="bold",
)
fig.text(
0.06, 0.93,
"Hierarchical clustering of mean Carbon-3B embeddings",
color=INK, fontsize=15,
)
fig.text(
0.06, 0.91,
f"{tree['n_total_points']:,} sequences · {n} species · {tree['dim']}-dim · cosine, Ward linkage",
color=MUTED, fontsize=9,
)
# Footer kingdom legend
legend_y = 0.04
legend_x = 0.06
for kname, kcolor in KINGDOM_COLOR.items():
fig.text(legend_x, legend_y, "■", color=kcolor, fontsize=11)
fig.text(legend_x + 0.018, legend_y, kname, color=INK, fontsize=9)
legend_x += 0.10
plt.tight_layout(rect=[0.05, 0.06, 0.97, 0.89])
plt.savefig(path, dpi=150, bbox_inches="tight", facecolor=PAPER)
plt.close(fig)
# ----------------------------------------------------------------------
# E. editorial Hillis (thick rounded branches, airy)
# ----------------------------------------------------------------------
def _draw_link_curved(ax, x0, y0, x1, y1, color, lw):
"""Draw a Bezier elbow that's softly rounded at the corner."""
# path: from (x0, y0) to (x1, y0) to (x1, y1) — but we round the corner
# by injecting two control points.
r = min(abs(x1 - x0), abs(y1 - y0)) * 0.18
if x1 > x0:
cx = x1 - r
else:
cx = x1 + r
if y1 > y0:
cy = y0 + r
else:
cy = y0 - r
verts = [
(x0, y0),
(cx, y0),
(x1, y0),
(x1, cy),
(x1, y1),
]
codes = [Path.MOVETO, Path.LINETO, Path.CURVE3, Path.CURVE3, Path.LINETO]
p = Path(verts, codes)
ax.add_patch(mpatches.PathPatch(
p, facecolor="none", edgecolor=color, lw=lw, capstyle="round",
joinstyle="round",
))
def render_editorial_hillis(tree, path):
species = tree["species"]
kingdom = dict(zip(species, tree["kingdom"]))
Z = np.array(tree["linkage_ward"])
ddata = dendrogram(Z, no_plot=True, labels=species)
icoord = np.array(ddata["icoord"])
dcoord = np.array(ddata["dcoord"])
leaf_order = ddata["ivl"]
n = len(leaf_order)
fig, ax = plt.subplots(figsize=(13, 10.5))
ax.set_facecolor(PAPER)
max_d = dcoord.max()
# Each row in icoord/dcoord is a "U" between two children.
# icoord = [xL, xL, xR, xR], dcoord = [yChildL, yMerge, yMerge, yChildR]
# In right-orientation we'd swap; here we'll convert ourselves:
# tree grows left to right. x = distance, y = leaf position.
for xs, ys in zip(icoord, dcoord):
x_left, x_right = ys[1], 0 # branches go from merge dist to 0 (tips)
y_top, y_bot = xs[0], xs[3]
y_merge = xs[1] # = xs[2]
# vertical bar at the merge
ax.plot([ys[1], ys[1]], [y_top, y_bot], color=INK, lw=2.4,
solid_capstyle="round")
# horizontal arms at children depth
ax.plot([ys[0], ys[1]], [y_top, y_top], color=INK, lw=2.4,
solid_capstyle="round")
ax.plot([ys[3], ys[2]], [y_bot, y_bot], color=INK, lw=2.4,
solid_capstyle="round")
# Leaf positions: dendrogram puts leaves at y = 5 + 10*i in icoord-space
leaf_y = [5 + 10 * i for i in range(n)]
label_x = -max_d * 0.04 # a touch left of x=0
for i, sp in enumerate(leaf_order):
kc = KINGDOM_COLOR.get(kingdom.get(sp), INK)
ax.text(
label_x, leaf_y[i], sp,
color=kc, fontsize=14, ha="right", va="center",
fontstyle="italic",
)
ax.set_xlim(-max_d * 0.30, max_d * 1.05)
ax.set_ylim(-5, n * 10 + 5)
ax.invert_yaxis()
ax.invert_xaxis() # root on the right? no — tips on the left, root right
# Wait: we want tips on the LEFT (label side). dcoord==0 is the tip,
# max_d is the root. So tip x=0 should be on the LEFT, root max_d on the
# RIGHT. Standard horizontal x-axis already does that. Don't invert.
ax.invert_xaxis() # undo the previous invert
# But we want labels on the LEFT, so leaf x=0 is on the LEFT, root max_d
# is on the RIGHT. That's the default. Good.
for spine in ("top", "right", "left", "bottom"):
ax.spines[spine].set_visible(False)
ax.set_xticks([])
ax.set_yticks([])
# Distance scale at bottom
bar_y = n * 10 + 8
bar_x_start = 0
bar_x_end = max_d * 0.5
ax.plot([bar_x_start, bar_x_end], [bar_y, bar_y], color=MUTED, lw=1)
ax.text((bar_x_start + bar_x_end) / 2, bar_y + 4,
f"{bar_x_end:.4f} cosine distance",
color=MUTED, fontsize=8, ha="center", va="top")
ax.set_ylim(-5, n * 10 + 18)
ax.invert_yaxis()
# Header
fig.text(
0.05, 0.96,
"§7 · CARBON TREE OF LIFE",
color="#317f3f", fontsize=10, fontweight="bold",
)
fig.text(
0.05, 0.93,
"What 571,810 sequences taught the model about who's related to whom",
color=INK, fontsize=16,
)
fig.text(
0.05, 0.91,
f"Mean-pooled Carbon-3B embeddings, hierarchically clustered (Ward, cosine)",
color=MUTED, fontsize=10,
)
legend_y = 0.04
legend_x = 0.05
for kname, kcolor in KINGDOM_COLOR.items():
fig.text(legend_x, legend_y, "●", color=kcolor, fontsize=12)
fig.text(legend_x + 0.013, legend_y, kname, color=INK, fontsize=10)
legend_x += 0.115
plt.tight_layout(rect=[0.04, 0.06, 0.96, 0.89])
plt.savefig(path, dpi=150, bbox_inches="tight", facecolor=PAPER)
plt.close(fig)
# ----------------------------------------------------------------------
# F. radial with kingdom background bands
# ----------------------------------------------------------------------
def render_radial_bands(tree, path):
species = tree["species"]
kingdom = dict(zip(species, tree["kingdom"]))
Z = np.array(tree["linkage_ward"])
ddata = dendrogram(Z, no_plot=True, labels=species)
leaf_order = ddata["ivl"]
n = len(leaf_order)
icoord = np.array(ddata["icoord"])
dcoord = np.array(ddata["dcoord"])
max_d = dcoord.max()
# Leaf x is at 5 + 10*i in icoord. Map any icoord-x to angle.
icoord_min, icoord_max = icoord.min(), icoord.max()
def x_to_angle(x):
return 2 * np.pi * (x - icoord_min) / (icoord_max - icoord_min)
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(111, projection="polar")
ax.set_facecolor(PAPER)
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
ax.set_rlim(0, max_d * 1.55)
ax.set_xticks([])
ax.set_yticks([])
ax.spines["polar"].set_visible(False)
ax.grid(False)
# ----- background kingdom bands -----
# find contiguous runs of same kingdom in leaf_order
band_inner = max_d * 1.10
band_outer = max_d * 1.50
# Compute leaf angles
leaf_angles = [x_to_angle(5 + 10 * i) for i in range(n)]
# Also a half-step on each side so the band covers the leaf cleanly
half_step = (2 * np.pi / n) / 2
runs = [] # list of (kingdom, start_angle, end_angle)
i = 0
while i < n:
k = kingdom.get(leaf_order[i])
j = i
while j + 1 < n and kingdom.get(leaf_order[j + 1]) == k:
j += 1
a0 = leaf_angles[i] - half_step
a1 = leaf_angles[j] + half_step
runs.append((k, a0, a1))
i = j + 1
# draw filled wedges
for k, a0, a1 in runs:
if k is None:
continue
n_seg = 64
thetas = np.linspace(a0, a1, n_seg)
# outer arc forward, inner arc backward → closed polygon
rs_outer = np.full(n_seg, band_outer)
rs_inner = np.full(n_seg, band_inner)
thetas_full = np.concatenate([thetas, thetas[::-1]])
rs_full = np.concatenate([rs_outer, rs_inner])
ax.fill(thetas_full, rs_full,
facecolor=KINGDOM_BG.get(k, "#eee"),
edgecolor="none", zorder=1)
# ----- tree branches -----
for xs, ys in zip(icoord, dcoord):
a = [x_to_angle(x) for x in xs]
# children verticals (in radial: arc segment along y at constant r)
ax.plot([a[0], a[0]], [ys[0], ys[1]], color=INK, lw=1.6, zorder=2)
ax.plot([a[3], a[3]], [ys[3], ys[2]], color=INK, lw=1.6, zorder=2)
# merge horizontal: arc at radius ys[1]
n_seg = 60
arc_a = np.linspace(a[0], a[3], n_seg)
ax.plot(arc_a, np.full(n_seg, ys[1]), color=INK, lw=1.6, zorder=2)
# ----- labels in periphery, oriented radially -----
label_r = max_d * 1.08
for i, sp in enumerate(leaf_order):
a = leaf_angles[i]
deg = np.degrees(a) % 360
if deg <= 180:
ha, rotation = "left", -deg
else:
ha, rotation = "right", -deg + 180
ax.text(
a, label_r, sp,
color=KINGDOM_COLOR.get(kingdom.get(sp), INK),
fontsize=11, ha=ha, va="center",
rotation=rotation, rotation_mode="anchor",
zorder=3,
)
# ----- header (text outside polar axes via figure coords) -----
fig.text(
0.5, 0.98,
"§7 · CARBON TREE OF LIFE",
color="#317f3f", fontsize=10, fontweight="bold",
ha="center",
)
fig.text(
0.5, 0.955,
"27 species clustered by mean embedding similarity",
color=INK, fontsize=15, ha="center",
)
fig.text(
0.5, 0.935,
f"{tree['n_total_points']:,} sequences · {tree['dim']}-dim · cosine, Ward linkage",
color=MUTED, fontsize=9, ha="center",
)
legend_y = 0.03
legend_x = 0.5 - 0.45
for kname, kcolor in KINGDOM_COLOR.items():
fig.text(legend_x, legend_y, "●", color=kcolor, fontsize=12)
fig.text(legend_x + 0.013, legend_y, kname, color=INK, fontsize=9)
legend_x += 0.13
plt.savefig(path, dpi=150, bbox_inches="tight", facecolor=PAPER)
plt.close(fig)
def main():
tree = load_tree()
d = os.path.join(OUT, "D_tree_with_tracks.png")
e = os.path.join(OUT, "E_editorial_hillis.png")
f = os.path.join(OUT, "F_radial_bands.png")
print(f"rendering → {d}"); render_tree_with_tracks(tree, d)
print(f"rendering → {e}"); render_editorial_hillis(tree, e)
print(f"rendering → {f}"); render_radial_bands(tree, f)
print(f"\nDone. Open {OUT}/")
if __name__ == "__main__":
main()