File size: 8,358 Bytes
f28d994
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""Figure metadata used by the v2 plotting system."""
from __future__ import annotations

FIGURE_SPECS = {
    "fig1_task_graph": {
        "paper_position": "Introduction / task definition",
        "main_text": True,
        "purpose": "Define author-paper link prediction on the heterogeneous academic graph.",
        "source": "Schematic; dataset sizes from project docs and README.",
        "caption": (
            "Heterogeneous author-paper graph and link-prediction task. Authors, papers, "
            "historical author-paper interactions, coauthor links, and directed paper-paper "
            "citations define the observed graph; each test author-paper pair is ranked for a "
            "binary recommendation decision."
        ),
    },
    "fig2_dataset_sparsity": {
        "paper_position": "Dataset",
        "main_text": True,
        "purpose": "Show sparsity, long tails, and cold-start pressure in the official graph.",
        "source": "data_and_docs/author_file_ann.txt, paper_file_ann.txt, bipartite_train_ann.txt",
        "caption": (
            "Dataset sparsity and long-tail structure. Log-log CCDFs show heavy-tailed "
            "coauthor, citation, and author-paper degrees, while the low-degree panel shows "
            "the mass of cold-start nodes that motivates structural and high-order features."
        ),
    },
    "fig3_performance_evolution": {
        "paper_position": "Results overview",
        "main_text": True,
        "purpose": "Summarize the method evolution from LightGCN to the final high-order stack.",
        "source": "README.md, reports, validation summaries, and figures_v2/data/manual_metrics.csv",
        "caption": (
            "Performance evolution across model stages. LightGCN provides the collaborative "
            "filtering backbone, graph/meta-path stacking supplies the largest jump, random-walk "
            "blocks add complementary high-order proximity, and citation-aware propagation gives "
            "the final lift to public F1 = 0.96626."
        ),
    },
    "fig4_method_pipeline": {
        "paper_position": "Method",
        "main_text": True,
        "purpose": "Explain the final two-stage LightGBM stacking pipeline.",
        "source": "README.md, CLAUDE.md, code/high_order_graph_stack.py",
        "caption": (
            "Final two-stage stacking pipeline. The first stage produces collaborative, graph, "
            "content, random-walk, and citation-propagation signals; the second-stage LightGBM "
            "stacker fuses roughly 259 features and uses a rank cutoff rather than a transferred "
            "probability threshold for submission generation."
        ),
    },
    "fig5_highorder_ablation": {
        "paper_position": "Ablation",
        "main_text": True,
        "purpose": "Quantify the value of high-order citation propagation without dual axes.",
        "source": "validation_runs/dynamic_seed202/high_order_graph_stack/validation_summary.csv",
        "caption": (
            "High-order propagation ablation. F1 and AUC are shown in separate panels to avoid "
            "dual-axis ambiguity. Rich content and random-walk blocks improve the stack, "
            "undirected high-order features add the largest late-stage gain, and directed "
            "citation propagation gives the final improvement."
        ),
    },
    "fig6_calibration_rank_cutoff": {
        "paper_position": "Decision rule / results",
        "main_text": True,
        "purpose": "Explain why rank cutoff is more robust than transferring a probability threshold.",
        "source": "validation_runs/stack_ratio_analysis.csv and high_order threshold summaries",
        "caption": (
            "Rank cutoff versus probability-threshold transfer. The validation split is "
            "artificially balanced, so validation probabilities are not calibrated for test; "
            "a rank cutoff keeps the predicted-positive ratio fixed while the transferred "
            "probability threshold drifts to about 0.524 on test."
        ),
    },
    "figA1_lightgcn_sweep": {
        "paper_position": "Appendix",
        "main_text": False,
        "purpose": "Document the LightGCN layer/dimension sweep.",
        "source": "validation_runs/dynamic_summary.csv",
        "caption": "LightGCN validation sweep over propagation depth and embedding dimension.",
    },
    "figA2_rw_ensemble": {
        "paper_position": "Appendix",
        "main_text": False,
        "purpose": "Show random-walk ensemble-size ablation.",
        "source": "validation_runs/dynamic_seed202/randomwalk_systematic/*.csv",
        "caption": "Random-walk ensemble-size ablation from the best single block to 5 and 7 blocks.",
    },
    "figA3_feature_group_contribution": {
        "paper_position": "Appendix",
        "main_text": False,
        "purpose": "Summarize incremental feature-group contributions from recorded ablations.",
        "source": "reports and figures_v2/data/manual_metrics.csv",
        "caption": "Feature-group contribution measured as recorded incremental validation-F1 gains.",
    },
    "figA4_error_buckets": {
        "paper_position": "Appendix",
        "main_text": False,
        "purpose": "Localize remaining weak regimes without a single overlong heatmap.",
        "source": "validation_runs/dynamic_seed202/error_group_calibration/error_analysis_buckets.csv",
        "caption": (
            "Error buckets reveal cold-start and weak-evidence regimes. The panels separate "
            "degree, rank/score, and local-evidence buckets and highlight the lowest-F1 rows."
        ),
    },
    "figA5_oof_pr_score": {
        "paper_position": "Appendix",
        "main_text": False,
        "purpose": "Show OOF discrimination and readable final-score distributions.",
        "source": "validation_runs/dynamic_seed202/*_oof.npy and val_labels_seed202.npy",
        "caption": (
            "OOF precision-recall curves and final-score ECDFs. The ECDF view avoids density "
            "spikes and makes positive/negative separation readable."
        ),
    },
    "figA6_feature_importance": {
        "paper_position": "Appendix optional",
        "main_text": False,
        "purpose": "LightGBM model feature importance if model metadata is reliably loadable.",
        "source": "cached_scores/lgb_model.pkl or lgb_v2_model.pkl",
        "caption": "Skipped unless LightGBM and feature names are available.",
    },
}

PLAN_ROWS = [
    ("fig1_task_graph.pdf", "Introduction", "Task formalization", "Schematic", "Project docs", "Yes", "Redraw as compact heterogeneous-graph diagram"),
    ("fig2_dataset_sparsity.pdf", "Dataset", "Sparse long-tail graph", "2x2 CCDF/bar", "Official edge files", "Yes", "Recompute degrees from real data"),
    ("fig3_performance_evolution.pdf", "Results", "Stage-wise improvement", "Line/step plot", "Recorded metrics", "Yes", "Use clean dual-line plot and only three annotations"),
    ("fig4_method_pipeline.pdf", "Method", "Two-stage stacker", "Architecture", "Code/docs", "Yes", "Aligned three-column schematic"),
    ("fig5_highorder_ablation.pdf", "Ablation", "High-order citation lift", "Two-panel line plot", "validation_summary.csv", "Yes", "Separate F1 and AUC panels"),
    ("fig6_calibration_rank_cutoff.pdf", "Decision rule", "Rank cutoff robustness", "Line + grouped bars", "ratio/threshold CSVs", "Yes", "Show ratio sweep and test drift"),
    ("figA1_lightgcn_sweep.pdf", "Appendix", "LightGCN config", "Heatmap", "dynamic_summary.csv", "No", "Compact heatmap with NA cells"),
    ("figA2_rw_ensemble.pdf", "Appendix", "RW ensemble benefit", "Line plot", "RW ablation CSVs", "No", "Small labels, no large annotations"),
    ("figA3_feature_group_contribution.pdf", "Appendix", "Feature groups", "Horizontal bars", "Recorded metrics", "No", "Short labels and bounded x-axis"),
    ("figA4_error_buckets.pdf", "Appendix", "Weak regimes", "1x3 heatmap", "error buckets CSV", "No", "Split long heatmap into three panels"),
    ("figA5_oof_pr_score.pdf", "Appendix", "OOF discrimination", "PR + ECDF", "OOF NPY files", "No", "Use ECDF instead of fragile density spike"),
    ("figA6_feature_importance.pdf", "Appendix", "Feature importance", "Bar chart", "LightGBM model", "Optional", "Skip unless reliable feature names and LightGBM are available"),
]