Spaces:
Running
Running
| """Round 2 of species tree mockups, focusing on visual quality. | |
| D. tree + alignment tracks (iTOL / Nature 2025 style) | |
| E. editorial Hillis (rounded thick branches, airy) | |
| F. radial with kingdom arcs as background bands | |
| """ | |
| import json | |
| import os | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import matplotlib.patches as mpatches | |
| from matplotlib.path import Path | |
| from matplotlib import font_manager | |
| import numpy as np | |
| from scipy.cluster.hierarchy import dendrogram, leaves_list | |
| HERE = os.path.dirname(os.path.abspath(__file__)) | |
| DATA = os.path.join(os.path.dirname(HERE), "data") | |
| OUT = os.path.join(DATA, "mockups") | |
| os.makedirs(OUT, exist_ok=True) | |
| PAPER = "#fbfaf6" | |
| INK = "#1f1f1d" | |
| MUTED = "#888888" | |
| SOFT = "#bbb8ad" | |
| GRID = "#e5e3da" | |
| # Less saturated kingdom palette (Krzywinski-style: muted, only one accent) | |
| KINGDOM_COLOR = { | |
| "vertebrates": "#1f1f1d", # ink (the "main" cohort) | |
| "invertebrates": "#7a6242", | |
| "plants": "#317f3f", | |
| "fungi": "#a9762f", | |
| "bacteria": "#b00020", | |
| "viruses": "#2c5aa0", | |
| } | |
| # Very pale background tints for kingdom bands | |
| KINGDOM_BG = { | |
| "vertebrates": "#ecebe5", | |
| "invertebrates": "#ece4d6", | |
| "plants": "#e3eee2", | |
| "fungi": "#f1e7d2", | |
| "bacteria": "#f3dcd8", | |
| "viruses": "#dde5f0", | |
| } | |
| def setup_font(): | |
| for name in ("JetBrains Mono", "Menlo", "Monaco", "DejaVu Sans Mono"): | |
| if any(name in f.name for f in font_manager.fontManager.ttflist): | |
| plt.rcParams["font.family"] = name | |
| return | |
| setup_font() | |
| plt.rcParams["axes.facecolor"] = PAPER | |
| plt.rcParams["figure.facecolor"] = PAPER | |
| plt.rcParams["savefig.facecolor"] = PAPER | |
| NCBI_VERTEBRATE_ORDER = [ | |
| "human", "macaque", "mouse", "rat", "dog", "cow", "pig", | |
| "chicken", "frog", "zebrafish", | |
| ] | |
| def load_tree(): | |
| with open(os.path.join(DATA, "species_tree.json")) as f: | |
| return json.load(f) | |
| # ---------------------------------------------------------------------- | |
| # D. tree + alignment tracks (iTOL / Nature) | |
| # ---------------------------------------------------------------------- | |
| def render_tree_with_tracks(tree, path): | |
| species = tree["species"] | |
| kingdom = dict(zip(species, tree["kingdom"])) | |
| counts = dict(zip(species, tree["counts"])) | |
| Z = np.array(tree["linkage_ward"]) | |
| # Resolve dendrogram leaf order for vertical alignment | |
| ddata = dendrogram(Z, no_plot=True, labels=species) | |
| leaf_order = ddata["ivl"] | |
| # Build figure with: tree (left) | label | kingdom chip | count bar | |
| fig = plt.figure(figsize=(13, 9)) | |
| gs = fig.add_gridspec( | |
| 1, 4, width_ratios=[5, 2.5, 0.7, 3.0], wspace=0.05, | |
| ) | |
| ax_tree = fig.add_subplot(gs[0]) | |
| ax_label = fig.add_subplot(gs[1], sharey=ax_tree) | |
| ax_chip = fig.add_subplot(gs[2], sharey=ax_tree) | |
| ax_count = fig.add_subplot(gs[3], sharey=ax_tree) | |
| # Tree (rectangular, right-oriented) | |
| dendrogram( | |
| Z, ax=ax_tree, orientation="right", | |
| labels=species, color_threshold=0, | |
| above_threshold_color=SOFT, no_labels=True, | |
| link_color_func=lambda k: SOFT, | |
| ) | |
| ax_tree.set_xlabel("cosine distance", fontsize=8, color=MUTED) | |
| for spine in ("top", "right", "left"): | |
| ax_tree.spines[spine].set_visible(False) | |
| ax_tree.spines["bottom"].set_color(GRID) | |
| ax_tree.tick_params(axis="x", colors=MUTED, labelsize=7, length=2) | |
| ax_tree.tick_params(axis="y", length=0, labelleft=False) | |
| ax_tree.grid(axis="x", linestyle=":", color=GRID, alpha=0.7) | |
| ax_tree.set_axisbelow(True) | |
| ax_tree.invert_xaxis() # tip on the right, root on the left | |
| # Each leaf y-position from dendrogram is 5, 15, 25... (5 + 10*i) | |
| n = len(leaf_order) | |
| leaf_y = [5 + 10 * i for i in range(n)] | |
| leaf_to_y = dict(zip(leaf_order, leaf_y)) | |
| # Label column | |
| ax_label.set_xlim(0, 1) | |
| ax_label.set_ylim(0, n * 10) | |
| ax_label.invert_yaxis() | |
| for i, sp in enumerate(leaf_order): | |
| ax_label.text( | |
| 0.05, leaf_y[i], sp, | |
| color=KINGDOM_COLOR.get(kingdom.get(sp), INK), | |
| fontsize=12, ha="left", va="center", | |
| ) | |
| ax_label.axis("off") | |
| # Kingdom chip column (filled square) | |
| ax_chip.set_xlim(0, 1) | |
| ax_chip.set_ylim(0, n * 10) | |
| ax_chip.invert_yaxis() | |
| for i, sp in enumerate(leaf_order): | |
| kc = KINGDOM_COLOR.get(kingdom.get(sp), INK) | |
| ax_chip.add_patch(mpatches.Rectangle( | |
| (0.25, leaf_y[i] - 3), 0.5, 6, | |
| facecolor=kc, edgecolor="none", | |
| )) | |
| ax_chip.axis("off") | |
| # Count bar column (log scale because human=59K, hiv1=10) | |
| max_count = max(counts.values()) | |
| log_max = np.log10(max_count + 1) | |
| ax_count.set_xlim(0, log_max * 1.05) | |
| ax_count.set_ylim(0, n * 10) | |
| ax_count.invert_yaxis() | |
| for i, sp in enumerate(leaf_order): | |
| c = counts.get(sp, 0) | |
| log_c = np.log10(c + 1) | |
| ax_count.add_patch(mpatches.Rectangle( | |
| (0, leaf_y[i] - 3), log_c, 6, | |
| facecolor="#d8d5c8", edgecolor="none", | |
| )) | |
| ax_count.text( | |
| log_c + 0.05, leaf_y[i], f"{c:,}", | |
| color=MUTED, fontsize=9, ha="left", va="center", | |
| ) | |
| for spine in ax_count.spines.values(): | |
| spine.set_visible(False) | |
| ax_count.tick_params(axis="both", length=0, labelleft=False, labelbottom=False) | |
| ax_count.set_xlabel("sequences (log)", fontsize=8, color=MUTED) | |
| # Header | |
| fig.text( | |
| 0.06, 0.96, | |
| "§7 · CARBON SPECIES TREE", | |
| color="#317f3f", fontsize=10, fontweight="bold", | |
| ) | |
| fig.text( | |
| 0.06, 0.93, | |
| "Hierarchical clustering of mean Carbon-3B embeddings", | |
| color=INK, fontsize=15, | |
| ) | |
| fig.text( | |
| 0.06, 0.91, | |
| f"{tree['n_total_points']:,} sequences · {n} species · {tree['dim']}-dim · cosine, Ward linkage", | |
| color=MUTED, fontsize=9, | |
| ) | |
| # Footer kingdom legend | |
| legend_y = 0.04 | |
| legend_x = 0.06 | |
| for kname, kcolor in KINGDOM_COLOR.items(): | |
| fig.text(legend_x, legend_y, "■", color=kcolor, fontsize=11) | |
| fig.text(legend_x + 0.018, legend_y, kname, color=INK, fontsize=9) | |
| legend_x += 0.10 | |
| plt.tight_layout(rect=[0.05, 0.06, 0.97, 0.89]) | |
| plt.savefig(path, dpi=150, bbox_inches="tight", facecolor=PAPER) | |
| plt.close(fig) | |
| # ---------------------------------------------------------------------- | |
| # E. editorial Hillis (thick rounded branches, airy) | |
| # ---------------------------------------------------------------------- | |
| def _draw_link_curved(ax, x0, y0, x1, y1, color, lw): | |
| """Draw a Bezier elbow that's softly rounded at the corner.""" | |
| # path: from (x0, y0) to (x1, y0) to (x1, y1) — but we round the corner | |
| # by injecting two control points. | |
| r = min(abs(x1 - x0), abs(y1 - y0)) * 0.18 | |
| if x1 > x0: | |
| cx = x1 - r | |
| else: | |
| cx = x1 + r | |
| if y1 > y0: | |
| cy = y0 + r | |
| else: | |
| cy = y0 - r | |
| verts = [ | |
| (x0, y0), | |
| (cx, y0), | |
| (x1, y0), | |
| (x1, cy), | |
| (x1, y1), | |
| ] | |
| codes = [Path.MOVETO, Path.LINETO, Path.CURVE3, Path.CURVE3, Path.LINETO] | |
| p = Path(verts, codes) | |
| ax.add_patch(mpatches.PathPatch( | |
| p, facecolor="none", edgecolor=color, lw=lw, capstyle="round", | |
| joinstyle="round", | |
| )) | |
| def render_editorial_hillis(tree, path): | |
| species = tree["species"] | |
| kingdom = dict(zip(species, tree["kingdom"])) | |
| Z = np.array(tree["linkage_ward"]) | |
| ddata = dendrogram(Z, no_plot=True, labels=species) | |
| icoord = np.array(ddata["icoord"]) | |
| dcoord = np.array(ddata["dcoord"]) | |
| leaf_order = ddata["ivl"] | |
| n = len(leaf_order) | |
| fig, ax = plt.subplots(figsize=(13, 10.5)) | |
| ax.set_facecolor(PAPER) | |
| max_d = dcoord.max() | |
| # Each row in icoord/dcoord is a "U" between two children. | |
| # icoord = [xL, xL, xR, xR], dcoord = [yChildL, yMerge, yMerge, yChildR] | |
| # In right-orientation we'd swap; here we'll convert ourselves: | |
| # tree grows left to right. x = distance, y = leaf position. | |
| for xs, ys in zip(icoord, dcoord): | |
| x_left, x_right = ys[1], 0 # branches go from merge dist to 0 (tips) | |
| y_top, y_bot = xs[0], xs[3] | |
| y_merge = xs[1] # = xs[2] | |
| # vertical bar at the merge | |
| ax.plot([ys[1], ys[1]], [y_top, y_bot], color=INK, lw=2.4, | |
| solid_capstyle="round") | |
| # horizontal arms at children depth | |
| ax.plot([ys[0], ys[1]], [y_top, y_top], color=INK, lw=2.4, | |
| solid_capstyle="round") | |
| ax.plot([ys[3], ys[2]], [y_bot, y_bot], color=INK, lw=2.4, | |
| solid_capstyle="round") | |
| # Leaf positions: dendrogram puts leaves at y = 5 + 10*i in icoord-space | |
| leaf_y = [5 + 10 * i for i in range(n)] | |
| label_x = -max_d * 0.04 # a touch left of x=0 | |
| for i, sp in enumerate(leaf_order): | |
| kc = KINGDOM_COLOR.get(kingdom.get(sp), INK) | |
| ax.text( | |
| label_x, leaf_y[i], sp, | |
| color=kc, fontsize=14, ha="right", va="center", | |
| fontstyle="italic", | |
| ) | |
| ax.set_xlim(-max_d * 0.30, max_d * 1.05) | |
| ax.set_ylim(-5, n * 10 + 5) | |
| ax.invert_yaxis() | |
| ax.invert_xaxis() # root on the right? no — tips on the left, root right | |
| # Wait: we want tips on the LEFT (label side). dcoord==0 is the tip, | |
| # max_d is the root. So tip x=0 should be on the LEFT, root max_d on the | |
| # RIGHT. Standard horizontal x-axis already does that. Don't invert. | |
| ax.invert_xaxis() # undo the previous invert | |
| # But we want labels on the LEFT, so leaf x=0 is on the LEFT, root max_d | |
| # is on the RIGHT. That's the default. Good. | |
| for spine in ("top", "right", "left", "bottom"): | |
| ax.spines[spine].set_visible(False) | |
| ax.set_xticks([]) | |
| ax.set_yticks([]) | |
| # Distance scale at bottom | |
| bar_y = n * 10 + 8 | |
| bar_x_start = 0 | |
| bar_x_end = max_d * 0.5 | |
| ax.plot([bar_x_start, bar_x_end], [bar_y, bar_y], color=MUTED, lw=1) | |
| ax.text((bar_x_start + bar_x_end) / 2, bar_y + 4, | |
| f"{bar_x_end:.4f} cosine distance", | |
| color=MUTED, fontsize=8, ha="center", va="top") | |
| ax.set_ylim(-5, n * 10 + 18) | |
| ax.invert_yaxis() | |
| # Header | |
| fig.text( | |
| 0.05, 0.96, | |
| "§7 · CARBON TREE OF LIFE", | |
| color="#317f3f", fontsize=10, fontweight="bold", | |
| ) | |
| fig.text( | |
| 0.05, 0.93, | |
| "What 571,810 sequences taught the model about who's related to whom", | |
| color=INK, fontsize=16, | |
| ) | |
| fig.text( | |
| 0.05, 0.91, | |
| f"Mean-pooled Carbon-3B embeddings, hierarchically clustered (Ward, cosine)", | |
| color=MUTED, fontsize=10, | |
| ) | |
| legend_y = 0.04 | |
| legend_x = 0.05 | |
| for kname, kcolor in KINGDOM_COLOR.items(): | |
| fig.text(legend_x, legend_y, "●", color=kcolor, fontsize=12) | |
| fig.text(legend_x + 0.013, legend_y, kname, color=INK, fontsize=10) | |
| legend_x += 0.115 | |
| plt.tight_layout(rect=[0.04, 0.06, 0.96, 0.89]) | |
| plt.savefig(path, dpi=150, bbox_inches="tight", facecolor=PAPER) | |
| plt.close(fig) | |
| # ---------------------------------------------------------------------- | |
| # F. radial with kingdom background bands | |
| # ---------------------------------------------------------------------- | |
| def render_radial_bands(tree, path): | |
| species = tree["species"] | |
| kingdom = dict(zip(species, tree["kingdom"])) | |
| Z = np.array(tree["linkage_ward"]) | |
| ddata = dendrogram(Z, no_plot=True, labels=species) | |
| leaf_order = ddata["ivl"] | |
| n = len(leaf_order) | |
| icoord = np.array(ddata["icoord"]) | |
| dcoord = np.array(ddata["dcoord"]) | |
| max_d = dcoord.max() | |
| # Leaf x is at 5 + 10*i in icoord. Map any icoord-x to angle. | |
| icoord_min, icoord_max = icoord.min(), icoord.max() | |
| def x_to_angle(x): | |
| return 2 * np.pi * (x - icoord_min) / (icoord_max - icoord_min) | |
| fig = plt.figure(figsize=(12, 12)) | |
| ax = fig.add_subplot(111, projection="polar") | |
| ax.set_facecolor(PAPER) | |
| ax.set_theta_zero_location("N") | |
| ax.set_theta_direction(-1) | |
| ax.set_rlim(0, max_d * 1.55) | |
| ax.set_xticks([]) | |
| ax.set_yticks([]) | |
| ax.spines["polar"].set_visible(False) | |
| ax.grid(False) | |
| # ----- background kingdom bands ----- | |
| # find contiguous runs of same kingdom in leaf_order | |
| band_inner = max_d * 1.10 | |
| band_outer = max_d * 1.50 | |
| # Compute leaf angles | |
| leaf_angles = [x_to_angle(5 + 10 * i) for i in range(n)] | |
| # Also a half-step on each side so the band covers the leaf cleanly | |
| half_step = (2 * np.pi / n) / 2 | |
| runs = [] # list of (kingdom, start_angle, end_angle) | |
| i = 0 | |
| while i < n: | |
| k = kingdom.get(leaf_order[i]) | |
| j = i | |
| while j + 1 < n and kingdom.get(leaf_order[j + 1]) == k: | |
| j += 1 | |
| a0 = leaf_angles[i] - half_step | |
| a1 = leaf_angles[j] + half_step | |
| runs.append((k, a0, a1)) | |
| i = j + 1 | |
| # draw filled wedges | |
| for k, a0, a1 in runs: | |
| if k is None: | |
| continue | |
| n_seg = 64 | |
| thetas = np.linspace(a0, a1, n_seg) | |
| # outer arc forward, inner arc backward → closed polygon | |
| rs_outer = np.full(n_seg, band_outer) | |
| rs_inner = np.full(n_seg, band_inner) | |
| thetas_full = np.concatenate([thetas, thetas[::-1]]) | |
| rs_full = np.concatenate([rs_outer, rs_inner]) | |
| ax.fill(thetas_full, rs_full, | |
| facecolor=KINGDOM_BG.get(k, "#eee"), | |
| edgecolor="none", zorder=1) | |
| # ----- tree branches ----- | |
| for xs, ys in zip(icoord, dcoord): | |
| a = [x_to_angle(x) for x in xs] | |
| # children verticals (in radial: arc segment along y at constant r) | |
| ax.plot([a[0], a[0]], [ys[0], ys[1]], color=INK, lw=1.6, zorder=2) | |
| ax.plot([a[3], a[3]], [ys[3], ys[2]], color=INK, lw=1.6, zorder=2) | |
| # merge horizontal: arc at radius ys[1] | |
| n_seg = 60 | |
| arc_a = np.linspace(a[0], a[3], n_seg) | |
| ax.plot(arc_a, np.full(n_seg, ys[1]), color=INK, lw=1.6, zorder=2) | |
| # ----- labels in periphery, oriented radially ----- | |
| label_r = max_d * 1.08 | |
| for i, sp in enumerate(leaf_order): | |
| a = leaf_angles[i] | |
| deg = np.degrees(a) % 360 | |
| if deg <= 180: | |
| ha, rotation = "left", -deg | |
| else: | |
| ha, rotation = "right", -deg + 180 | |
| ax.text( | |
| a, label_r, sp, | |
| color=KINGDOM_COLOR.get(kingdom.get(sp), INK), | |
| fontsize=11, ha=ha, va="center", | |
| rotation=rotation, rotation_mode="anchor", | |
| zorder=3, | |
| ) | |
| # ----- header (text outside polar axes via figure coords) ----- | |
| fig.text( | |
| 0.5, 0.98, | |
| "§7 · CARBON TREE OF LIFE", | |
| color="#317f3f", fontsize=10, fontweight="bold", | |
| ha="center", | |
| ) | |
| fig.text( | |
| 0.5, 0.955, | |
| "27 species clustered by mean embedding similarity", | |
| color=INK, fontsize=15, ha="center", | |
| ) | |
| fig.text( | |
| 0.5, 0.935, | |
| f"{tree['n_total_points']:,} sequences · {tree['dim']}-dim · cosine, Ward linkage", | |
| color=MUTED, fontsize=9, ha="center", | |
| ) | |
| legend_y = 0.03 | |
| legend_x = 0.5 - 0.45 | |
| for kname, kcolor in KINGDOM_COLOR.items(): | |
| fig.text(legend_x, legend_y, "●", color=kcolor, fontsize=12) | |
| fig.text(legend_x + 0.013, legend_y, kname, color=INK, fontsize=9) | |
| legend_x += 0.13 | |
| plt.savefig(path, dpi=150, bbox_inches="tight", facecolor=PAPER) | |
| plt.close(fig) | |
| def main(): | |
| tree = load_tree() | |
| d = os.path.join(OUT, "D_tree_with_tracks.png") | |
| e = os.path.join(OUT, "E_editorial_hillis.png") | |
| f = os.path.join(OUT, "F_radial_bands.png") | |
| print(f"rendering → {d}"); render_tree_with_tracks(tree, d) | |
| print(f"rendering → {e}"); render_editorial_hillis(tree, e) | |
| print(f"rendering → {f}"); render_radial_bands(tree, f) | |
| print(f"\nDone. Open {OUT}/") | |
| if __name__ == "__main__": | |
| main() | |