METHODS = [ { "id": "coins", "name": "COINs - Knowledge Graph Reasoning", "thesis_section": "3.1", "description": ( "Community-Informed Graph Embeddings (COINs) for scalable knowledge graph link prediction " "and complex query answering. Uses community detection to localize embedding computation, " "achieving significant speedups over full-graph methods." ), }, { "id": "multiproxan", "name": "MultiProxAn - Graph Generation", "thesis_section": "4.3", "description": ( "Discrete denoising diffusion model for graph generation with MultiProx sampling. " "Generates molecular graphs (QM9) and synthetic community graphs using iterative " "multi-measurement Gibbs sampling for improved sample quality." ), }, { "id": "kg_anomaly", "name": "KG Anomaly Correction", "thesis_section": "4.4", "description": ( "Diffusion-based knowledge graph subgraph correction. Applies the DiGress denoising " "diffusion model to knowledge graph subgraphs to detect and correct anomalous edges." ), }, ] COINS_DATASET_META = { "freebase": { "name": "FB15k-237", "description": "Subset of Freebase knowledge base with 237 relation types", "data_dir": "FB15k-237", }, "wordnet": { "name": "WN18RR", "description": "Subset of WordNet lexical database with 11 relation types", "data_dir": "WN18RR", }, "nell": { "name": "NELL-995", "description": "Never-Ending Language Learner knowledge base with 200 relation types", "data_dir": "NELL-995", }, } COINS_MODELS = [ { "algorithm": "transe", "name": "TransE", "description": "Translation-based embedding model", "supported_query_structures": ["1p"], }, { "algorithm": "distmult", "name": "DistMult", "description": "Bilinear diagonal embedding model", "supported_query_structures": ["1p"], }, { "algorithm": "complex", "name": "ComplEx", "description": "Complex-valued embedding model", "supported_query_structures": ["1p"], }, { "algorithm": "rotate", "name": "RotatE", "description": "Rotation-based embedding model in complex space", "supported_query_structures": ["1p"], }, { "algorithm": "q2b", "name": "Query2Box", "description": "Box embedding model for complex logical queries", "supported_query_structures": ["1p", "2p", "3p", "2i", "3i", "ip", "pi"], }, { "algorithm": "kbgat", "name": "KBGAT", "description": "Knowledge base graph attention network", "supported_query_structures": ["1p"], }, ] QUERY_STRUCTURES = [ { "id": "1p", "name": "Single Hop", "description": "Direct link prediction: who/what is connected to the anchor via this relation?", "nodes": [ {"id": "a", "type": "anchor", "label": "Anchor"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a", "target": "t", "label": "Relation"}, ], }, { "id": "2p", "name": "Two Hop", "description": "Two-step chain: anchor -> variable -> target", "nodes": [ {"id": "a", "type": "anchor", "label": "Anchor"}, {"id": "v1", "type": "variable", "label": "Variable"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a", "target": "v1", "label": "Relation 1"}, {"id": "r2", "source": "v1", "target": "t", "label": "Relation 2"}, ], }, { "id": "3p", "name": "Three Hop", "description": "Three-step chain: anchor -> v1 -> v2 -> target", "nodes": [ {"id": "a", "type": "anchor", "label": "Anchor"}, {"id": "v1", "type": "variable", "label": "Variable 1"}, {"id": "v2", "type": "variable", "label": "Variable 2"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a", "target": "v1", "label": "Relation 1"}, {"id": "r2", "source": "v1", "target": "v2", "label": "Relation 2"}, {"id": "r3", "source": "v2", "target": "t", "label": "Relation 3"}, ], }, { "id": "2i", "name": "Two Intersection", "description": "Intersection of two single-hop queries sharing the same target", "nodes": [ {"id": "a1", "type": "anchor", "label": "Anchor 1"}, {"id": "a2", "type": "anchor", "label": "Anchor 2"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a1", "target": "t", "label": "Relation 1"}, {"id": "r2", "source": "a2", "target": "t", "label": "Relation 2"}, ], }, { "id": "3i", "name": "Three Intersection", "description": "Intersection of three single-hop queries sharing the same target", "nodes": [ {"id": "a1", "type": "anchor", "label": "Anchor 1"}, {"id": "a2", "type": "anchor", "label": "Anchor 2"}, {"id": "a3", "type": "anchor", "label": "Anchor 3"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a1", "target": "t", "label": "Relation 1"}, {"id": "r2", "source": "a2", "target": "t", "label": "Relation 2"}, {"id": "r3", "source": "a3", "target": "t", "label": "Relation 3"}, ], }, { "id": "ip", "name": "Intersection then Projection", "description": "Two anchors intersect, then the result projects via a third relation to the target", "nodes": [ {"id": "a1", "type": "anchor", "label": "Anchor 1"}, {"id": "a2", "type": "anchor", "label": "Anchor 2"}, {"id": "v1", "type": "variable", "label": "Variable"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a1", "target": "v1", "label": "Relation 1"}, {"id": "r2", "source": "a2", "target": "v1", "label": "Relation 2"}, {"id": "r3", "source": "v1", "target": "t", "label": "Relation 3"}, ], }, { "id": "pi", "name": "Projection then Intersection", "description": "One anchor projects then intersects with a direct connection from a second anchor", "nodes": [ {"id": "a1", "type": "anchor", "label": "Anchor 1"}, {"id": "v1", "type": "variable", "label": "Variable"}, {"id": "a2", "type": "anchor", "label": "Anchor 2"}, {"id": "t", "type": "target", "label": "?"}, ], "edges": [ {"id": "r1", "source": "a1", "target": "v1", "label": "Relation 1"}, {"id": "r2", "source": "v1", "target": "t", "label": "Relation 2"}, {"id": "r3", "source": "a2", "target": "t", "label": "Relation 3"}, ], }, ] GRAPHGEN_DATASETS = { "qm9": { "name": "QM9", "type": "molecular", "description": "Small organic molecules with up to 9 heavy atoms (C, N, O, F)", "node_types": ["C", "N", "O", "F"], "edge_types": ["none", "single", "double", "triple", "aromatic"], "max_nodes": 9, }, "comm20": { "name": "Community20", "type": "synthetic", "description": "Synthetic community-structured graphs with 12-20 nodes", "node_types": ["node"], "edge_types": ["none", "edge"], "max_nodes": 20, }, } GRAPHGEN_SAMPLING_MODES = [ { "id": "standard", "name": "Standard Denoising", "description": "Iterative denoising from T to 0. Full quality, slower.", "parameters": [ { "name": "diffusion_steps", "type": "integer", "description": "Number of diffusion steps T", "default": 500, "min": 50, "max": 1000, }, { "name": "chain_frames", "type": "integer", "description": "Number of denoising snapshots in the GIF", "default": 20, "min": 10, "max": 30, }, ], }, { "id": "multiprox", "name": "MultiProx Sampling", "description": ( "Multi-measurement Gibbs sampling with proximal steps. " "Step-by-step generation with controllable noise levels." ), "parameters": [ { "name": "diffusion_steps", "type": "integer", "description": "Number of diffusion steps T", "default": 500, "min": 50, "max": 1000, }, { "name": "m", "type": "integer", "description": "Number of parallel samples per multi-measurement step", "default": 10, "min": 2, "max": 100, }, { "name": "t", "type": "float", "description": "First noise level (normalized, 0-1)", "default": 0.5, "min": 0.0, "max": 1.0, }, { "name": "t_prime", "type": "float", "description": "Second noise level (normalized, 0-1). Must satisfy t_prime <= t.", "default": 0.1, "min": 0.0, "max": 1.0, }, ], }, ] # --- COINs predict helpers --- QUERY_STRUCTURE_INTERNAL = { "1p": "1p", "2p": "2p", "3p": "3p", "2i": "2i", "3i": "3i", "ip": "2i1p", "pi": "1p2i", } # Maps API node/edge IDs to query tree node/edge indices. # Anchor/variable node IDs → tree vertex index; edge IDs → tree edge index. # Key insight: edge_index == target_node_index in all COINs query trees. QUERY_TREE_MAPPINGS = { "1p": {"nodes": {"a": 0}, "edges": {"r1": 0}}, "2p": {"nodes": {"a": 0, "v1": 1}, "edges": {"r1": 0, "r2": 1}}, "3p": {"nodes": {"a": 0, "v1": 1, "v2": 2}, "edges": {"r1": 0, "r2": 1, "r3": 2}}, "2i": {"nodes": {"a1": 0, "a2": 2}, "edges": {"r1": 0, "r2": 2}}, "3i": {"nodes": {"a1": 0, "a2": 2, "a3": 4}, "edges": {"r1": 0, "r2": 2, "r3": 4}}, "ip": {"nodes": {"a1": 0, "a2": 2, "v1": 4}, "edges": {"r1": 0, "r2": 2, "r3": 4}}, "pi": {"nodes": {"a1": 0, "v1": 1, "a2": 3}, "edges": {"r1": 0, "r2": 1, "r3": 3}}, } COINS_CONFIG_SUFFIX = { "transe": "", "distmult": "_distmult", "complex": "_complex", "rotate": "_rotate", "q2b": "_q2b", "kbgat": "_gnn", } KG_ANOMALY_DATASET_META = { "freebase": { "name": "FB15k-237", "description": "Diffusion model trained on Freebase subgraphs", }, "wordnet": { "name": "WN18RR", "description": "Diffusion model trained on WordNet subgraphs", }, "nell": { "name": "NELL-995", "description": "Diffusion model trained on NELL subgraphs", }, }