"""Figure 1 — Task and heterogeneous graph overview (schematic). Hand-drawn schema with matplotlib patches: author/paper lanes, the three edge types, a highlighted (author, paper) test pair and the binary output, plus a data-scale annotation bar. No real graph is plotted — this is the task definition. """ from pathlib import Path import matplotlib.pyplot as plt from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Circle, Rectangle from style import apply, save, PALETTE as C, COL2 # noqa: E402 KEY = "fig1_graph_overview" TITLE = "Figure 1. Task and heterogeneous graph overview" def make(root, out): apply() fig, ax = plt.subplots(figsize=(COL2, 3.5)) ax.set_xlim(0, 13); ax.set_ylim(0, 7); ax.axis("off") def lane(x, w, color, title): ax.add_patch(FancyBboxPatch((x, 2.2), w, 3.4, boxstyle="round,pad=0.1,rounding_size=0.2", fc=color, ec=color, alpha=0.08, lw=1.5)) ax.text(x + w / 2, 5.9, title, ha="center", fontsize=9.5, fontweight="bold", color=color) lane(0.3, 3.4, C[0], "Authors (6,611)") lane(9.1, 3.6, C[2], "Papers (79,937)") authors = [("A1", 1.1, 4.8), ("A2", 2.6, 5.0), ("A3", 1.4, 3.4), ("A4", 2.9, 3.2), ("A5", 2.0, 4.1)] papers = [("P1", 9.6, 4.9), ("P2", 11.0, 4.6), ("P3", 9.9, 3.3), ("P4", 11.2, 3.0), ("P5", 10.4, 4.0)] for n, x, y in authors: ax.add_patch(Circle((x, y), 0.30, fc=C[0], ec="white", lw=1.2, alpha=0.9)) ax.text(x, y, n, ha="center", va="center", fontsize=7.5, color="white", fontweight="bold") for n, x, y in papers: ax.add_patch(Rectangle((x - 0.30, y - 0.26), 0.60, 0.52, fc=C[2], ec="white", lw=1.2, alpha=0.9)) ax.text(x, y, n, ha="center", va="center", fontsize=7.5, color="white", fontweight="bold") def edge(a, b, color, ls, lw=1.3, arrow=False): sty = "-|>" if arrow else "-" ax.add_patch(FancyArrowPatch(a, b, arrowstyle=sty, mutation_scale=11, color=color, lw=lw, linestyle=ls, connectionstyle="arc3,rad=0.04", alpha=0.85)) # Author-Paper historical read/cite (solid blue) edge(authors[0][1:], papers[0][1:], C[0], "-") edge(authors[4][1:], papers[2][1:], C[0], "-") edge(authors[2][1:], papers[4][1:], C[0], "-") edge(authors[1][1:], papers[1][1:], C[0], "-") # Author-Author coauthor (dashed orange) edge(authors[0][1:], authors[4][1:], C[1], "--") edge(authors[2][1:], authors[3][1:], C[1], "--") # Paper -> Paper citation (dotted green, directed) edge(papers[0][1:], papers[4][1:], C[3], ":", lw=1.6, arrow=True) edge(papers[2][1:], papers[3][1:], C[3], ":", lw=1.6, arrow=True) # Highlighted TEST pair ta, tp = authors[3][1:], papers[4][1:] edge(ta, tp, C[5], "-", lw=2.6) ax.text((ta[0] + tp[0]) / 2, (ta[1] + tp[1]) / 2 + 0.45, "test pair (author, paper)", ha="center", fontsize=8, color=C[5], fontweight="bold") # Binary output badge ax.add_patch(FancyBboxPatch((tp[0] + 0.7, tp[1] - 0.25), 1.7, 0.6, boxstyle="round,pad=0.05,rounding_size=0.12", fc=C[5], ec="white")) ax.text(tp[0] + 1.55, tp[1] + 0.05, "label {0,1}", ha="center", va="center", fontsize=8, color="white", fontweight="bold") # Legend from matplotlib.lines import Line2D leg = [ Line2D([0], [0], color=C[0], lw=1.8, label="author–paper read/cite"), Line2D([0], [0], color=C[1], lw=1.8, ls="--", label="author–author coauthorship"), Line2D([0], [0], color=C[3], lw=1.8, ls=":", label="paper→paper citation (directed)"), Line2D([0], [0], color=C[5], lw=2.6, label="test pair (predict)"), ] ax.legend(handles=leg, loc="lower center", bbox_to_anchor=(0.5, -0.04), ncol=4, fontsize=7.2, handlelength=2.2, columnspacing=1.2) # Bottom scale bar ax.text(6.5, 0.45, "2,047,262 test pairs · metric: F1-score · edges: 9,663 coauthor / 327,113 citation / 682,421 read", ha="center", fontsize=7.8, color="dimgray") ax.set_title("Heterogeneous author–paper graph and link-prediction task", fontsize=10) save(fig, KEY, out) return dict(key=KEY, title=TITLE, status="ok", files=[f"{KEY}.pdf", f"{KEY}.png", f"{KEY}.svg"], sources=["schematic (no real graph)"], caption=( "Heterogeneous author–paper graph and link-prediction task. Nodes are authors " "(circles, 6,611) and papers (squares, 79,937); relations are author–paper historical " "read/cite, author–author coauthorship, and directed paper→paper citation. For each test " "pair (author, paper) the model predicts a binary label; performance is measured by F1 over " "2,047,262 test pairs.")) if __name__ == "__main__": from style import ensure_dirs r = make(Path("."), ensure_dirs(Path("."))) print(r["key"], r["status"])