cs3319-project2 / figures_paper /scripts /fig1_graph_overview.py
NLP-beginner's picture
CS3319 Project 2 final deliverable (public F1 = 0.96626)
f28d994
Raw
History Blame Contribute Delete
4.97 kB
"""Figure 1 — Task and heterogeneous graph overview (schematic).
Hand-drawn schema with matplotlib patches: author/paper lanes, the three edge
types, a highlighted (author, paper) test pair and the binary output, plus a
data-scale annotation bar. No real graph is plotted — this is the task definition.
"""
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Circle, Rectangle
from style import apply, save, PALETTE as C, COL2 # noqa: E402
KEY = "fig1_graph_overview"
TITLE = "Figure 1. Task and heterogeneous graph overview"
def make(root, out):
apply()
fig, ax = plt.subplots(figsize=(COL2, 3.5))
ax.set_xlim(0, 13); ax.set_ylim(0, 7); ax.axis("off")
def lane(x, w, color, title):
ax.add_patch(FancyBboxPatch((x, 2.2), w, 3.4, boxstyle="round,pad=0.1,rounding_size=0.2",
fc=color, ec=color, alpha=0.08, lw=1.5))
ax.text(x + w / 2, 5.9, title, ha="center", fontsize=9.5, fontweight="bold", color=color)
lane(0.3, 3.4, C[0], "Authors (6,611)")
lane(9.1, 3.6, C[2], "Papers (79,937)")
authors = [("A1", 1.1, 4.8), ("A2", 2.6, 5.0), ("A3", 1.4, 3.4), ("A4", 2.9, 3.2), ("A5", 2.0, 4.1)]
papers = [("P1", 9.6, 4.9), ("P2", 11.0, 4.6), ("P3", 9.9, 3.3), ("P4", 11.2, 3.0), ("P5", 10.4, 4.0)]
for n, x, y in authors:
ax.add_patch(Circle((x, y), 0.30, fc=C[0], ec="white", lw=1.2, alpha=0.9))
ax.text(x, y, n, ha="center", va="center", fontsize=7.5, color="white", fontweight="bold")
for n, x, y in papers:
ax.add_patch(Rectangle((x - 0.30, y - 0.26), 0.60, 0.52, fc=C[2], ec="white", lw=1.2, alpha=0.9))
ax.text(x, y, n, ha="center", va="center", fontsize=7.5, color="white", fontweight="bold")
def edge(a, b, color, ls, lw=1.3, arrow=False):
sty = "-|>" if arrow else "-"
ax.add_patch(FancyArrowPatch(a, b, arrowstyle=sty, mutation_scale=11,
color=color, lw=lw, linestyle=ls,
connectionstyle="arc3,rad=0.04", alpha=0.85))
# Author-Paper historical read/cite (solid blue)
edge(authors[0][1:], papers[0][1:], C[0], "-")
edge(authors[4][1:], papers[2][1:], C[0], "-")
edge(authors[2][1:], papers[4][1:], C[0], "-")
edge(authors[1][1:], papers[1][1:], C[0], "-")
# Author-Author coauthor (dashed orange)
edge(authors[0][1:], authors[4][1:], C[1], "--")
edge(authors[2][1:], authors[3][1:], C[1], "--")
# Paper -> Paper citation (dotted green, directed)
edge(papers[0][1:], papers[4][1:], C[3], ":", lw=1.6, arrow=True)
edge(papers[2][1:], papers[3][1:], C[3], ":", lw=1.6, arrow=True)
# Highlighted TEST pair
ta, tp = authors[3][1:], papers[4][1:]
edge(ta, tp, C[5], "-", lw=2.6)
ax.text((ta[0] + tp[0]) / 2, (ta[1] + tp[1]) / 2 + 0.45, "test pair (author, paper)",
ha="center", fontsize=8, color=C[5], fontweight="bold")
# Binary output badge
ax.add_patch(FancyBboxPatch((tp[0] + 0.7, tp[1] - 0.25), 1.7, 0.6,
boxstyle="round,pad=0.05,rounding_size=0.12", fc=C[5], ec="white"))
ax.text(tp[0] + 1.55, tp[1] + 0.05, "label {0,1}", ha="center", va="center",
fontsize=8, color="white", fontweight="bold")
# Legend
from matplotlib.lines import Line2D
leg = [
Line2D([0], [0], color=C[0], lw=1.8, label="author–paper read/cite"),
Line2D([0], [0], color=C[1], lw=1.8, ls="--", label="author–author coauthorship"),
Line2D([0], [0], color=C[3], lw=1.8, ls=":", label="paper→paper citation (directed)"),
Line2D([0], [0], color=C[5], lw=2.6, label="test pair (predict)"),
]
ax.legend(handles=leg, loc="lower center", bbox_to_anchor=(0.5, -0.04),
ncol=4, fontsize=7.2, handlelength=2.2, columnspacing=1.2)
# Bottom scale bar
ax.text(6.5, 0.45,
"2,047,262 test pairs · metric: F1-score · edges: 9,663 coauthor / 327,113 citation / 682,421 read",
ha="center", fontsize=7.8, color="dimgray")
ax.set_title("Heterogeneous author–paper graph and link-prediction task", fontsize=10)
save(fig, KEY, out)
return dict(key=KEY, title=TITLE, status="ok", files=[f"{KEY}.pdf", f"{KEY}.png", f"{KEY}.svg"],
sources=["schematic (no real graph)"], caption=(
"Heterogeneous author–paper graph and link-prediction task. Nodes are authors "
"(circles, 6,611) and papers (squares, 79,937); relations are author–paper historical "
"read/cite, author–author coauthorship, and directed paper→paper citation. For each test "
"pair (author, paper) the model predicts a binary label; performance is measured by F1 over "
"2,047,262 test pairs."))
if __name__ == "__main__":
from style import ensure_dirs
r = make(Path("."), ensure_dirs(Path(".")))
print(r["key"], r["status"])