| METHODS = [ |
| { |
| "id": "coins", |
| "name": "COINs - Knowledge Graph Reasoning", |
| "thesis_section": "3.1", |
| "description": ( |
| "Community-Informed Graph Embeddings (COINs) for scalable knowledge graph link prediction " |
| "and complex query answering. Uses community detection to localize embedding computation, " |
| "achieving significant speedups over full-graph methods." |
| ), |
| }, |
| { |
| "id": "multiproxan", |
| "name": "MultiProxAn - Graph Generation", |
| "thesis_section": "4.3", |
| "description": ( |
| "Discrete denoising diffusion model for graph generation with MultiProx sampling. " |
| "Generates molecular graphs (QM9) and synthetic community graphs using iterative " |
| "multi-measurement Gibbs sampling for improved sample quality." |
| ), |
| }, |
| { |
| "id": "kg_anomaly", |
| "name": "KG Anomaly Correction", |
| "thesis_section": "4.4", |
| "description": ( |
| "Diffusion-based knowledge graph subgraph correction. Applies the DiGress denoising " |
| "diffusion model to knowledge graph subgraphs to detect and correct anomalous edges." |
| ), |
| }, |
| ] |
|
|
| COINS_DATASET_META = { |
| "freebase": { |
| "name": "FB15k-237", |
| "description": "Subset of Freebase knowledge base with 237 relation types", |
| "data_dir": "FB15k-237", |
| }, |
| "wordnet": { |
| "name": "WN18RR", |
| "description": "Subset of WordNet lexical database with 11 relation types", |
| "data_dir": "WN18RR", |
| }, |
| "nell": { |
| "name": "NELL-995", |
| "description": "Never-Ending Language Learner knowledge base with 200 relation types", |
| "data_dir": "NELL-995", |
| }, |
| } |
|
|
| COINS_MODELS = [ |
| { |
| "algorithm": "transe", |
| "name": "TransE", |
| "description": "Translation-based embedding model", |
| "supported_query_structures": ["1p"], |
| }, |
| { |
| "algorithm": "distmult", |
| "name": "DistMult", |
| "description": "Bilinear diagonal embedding model", |
| "supported_query_structures": ["1p"], |
| }, |
| { |
| "algorithm": "complex", |
| "name": "ComplEx", |
| "description": "Complex-valued embedding model", |
| "supported_query_structures": ["1p"], |
| }, |
| { |
| "algorithm": "rotate", |
| "name": "RotatE", |
| "description": "Rotation-based embedding model in complex space", |
| "supported_query_structures": ["1p"], |
| }, |
| { |
| "algorithm": "q2b", |
| "name": "Query2Box", |
| "description": "Box embedding model for complex logical queries", |
| "supported_query_structures": ["1p", "2p", "3p", "2i", "3i", "ip", "pi"], |
| }, |
| { |
| "algorithm": "kbgat", |
| "name": "KBGAT", |
| "description": "Knowledge base graph attention network", |
| "supported_query_structures": ["1p"], |
| }, |
| ] |
|
|
| QUERY_STRUCTURES = [ |
| { |
| "id": "1p", |
| "name": "Single Hop", |
| "description": "Direct link prediction: who/what is connected to the anchor via this relation?", |
| "nodes": [ |
| {"id": "a", "type": "anchor", "label": "Anchor"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a", "target": "t", "label": "Relation"}, |
| ], |
| }, |
| { |
| "id": "2p", |
| "name": "Two Hop", |
| "description": "Two-step chain: anchor -> variable -> target", |
| "nodes": [ |
| {"id": "a", "type": "anchor", "label": "Anchor"}, |
| {"id": "v1", "type": "variable", "label": "Variable"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a", "target": "v1", "label": "Relation 1"}, |
| {"id": "r2", "source": "v1", "target": "t", "label": "Relation 2"}, |
| ], |
| }, |
| { |
| "id": "3p", |
| "name": "Three Hop", |
| "description": "Three-step chain: anchor -> v1 -> v2 -> target", |
| "nodes": [ |
| {"id": "a", "type": "anchor", "label": "Anchor"}, |
| {"id": "v1", "type": "variable", "label": "Variable 1"}, |
| {"id": "v2", "type": "variable", "label": "Variable 2"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a", "target": "v1", "label": "Relation 1"}, |
| {"id": "r2", "source": "v1", "target": "v2", "label": "Relation 2"}, |
| {"id": "r3", "source": "v2", "target": "t", "label": "Relation 3"}, |
| ], |
| }, |
| { |
| "id": "2i", |
| "name": "Two Intersection", |
| "description": "Intersection of two single-hop queries sharing the same target", |
| "nodes": [ |
| {"id": "a1", "type": "anchor", "label": "Anchor 1"}, |
| {"id": "a2", "type": "anchor", "label": "Anchor 2"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a1", "target": "t", "label": "Relation 1"}, |
| {"id": "r2", "source": "a2", "target": "t", "label": "Relation 2"}, |
| ], |
| }, |
| { |
| "id": "3i", |
| "name": "Three Intersection", |
| "description": "Intersection of three single-hop queries sharing the same target", |
| "nodes": [ |
| {"id": "a1", "type": "anchor", "label": "Anchor 1"}, |
| {"id": "a2", "type": "anchor", "label": "Anchor 2"}, |
| {"id": "a3", "type": "anchor", "label": "Anchor 3"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a1", "target": "t", "label": "Relation 1"}, |
| {"id": "r2", "source": "a2", "target": "t", "label": "Relation 2"}, |
| {"id": "r3", "source": "a3", "target": "t", "label": "Relation 3"}, |
| ], |
| }, |
| { |
| "id": "ip", |
| "name": "Intersection then Projection", |
| "description": "Two anchors intersect, then the result projects via a third relation to the target", |
| "nodes": [ |
| {"id": "a1", "type": "anchor", "label": "Anchor 1"}, |
| {"id": "a2", "type": "anchor", "label": "Anchor 2"}, |
| {"id": "v1", "type": "variable", "label": "Variable"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a1", "target": "v1", "label": "Relation 1"}, |
| {"id": "r2", "source": "a2", "target": "v1", "label": "Relation 2"}, |
| {"id": "r3", "source": "v1", "target": "t", "label": "Relation 3"}, |
| ], |
| }, |
| { |
| "id": "pi", |
| "name": "Projection then Intersection", |
| "description": "One anchor projects then intersects with a direct connection from a second anchor", |
| "nodes": [ |
| {"id": "a1", "type": "anchor", "label": "Anchor 1"}, |
| {"id": "v1", "type": "variable", "label": "Variable"}, |
| {"id": "a2", "type": "anchor", "label": "Anchor 2"}, |
| {"id": "t", "type": "target", "label": "?"}, |
| ], |
| "edges": [ |
| {"id": "r1", "source": "a1", "target": "v1", "label": "Relation 1"}, |
| {"id": "r2", "source": "v1", "target": "t", "label": "Relation 2"}, |
| {"id": "r3", "source": "a2", "target": "t", "label": "Relation 3"}, |
| ], |
| }, |
| ] |
|
|
| GRAPHGEN_DATASETS = { |
| "qm9": { |
| "name": "QM9", |
| "type": "molecular", |
| "description": "Small organic molecules with up to 9 heavy atoms (C, N, O, F)", |
| "node_types": ["C", "N", "O", "F"], |
| "edge_types": ["none", "single", "double", "triple", "aromatic"], |
| "max_nodes": 9, |
| }, |
| "comm20": { |
| "name": "Community20", |
| "type": "synthetic", |
| "description": "Synthetic community-structured graphs with 12-20 nodes", |
| "node_types": ["node"], |
| "edge_types": ["none", "edge"], |
| "max_nodes": 20, |
| }, |
| } |
|
|
| GRAPHGEN_SAMPLING_MODES = [ |
| { |
| "id": "standard", |
| "name": "Standard Denoising", |
| "description": "Iterative denoising from T to 0. Full quality, slower.", |
| "parameters": [ |
| { |
| "name": "diffusion_steps", |
| "type": "integer", |
| "description": "Number of diffusion steps T", |
| "default": 500, |
| "min": 50, |
| "max": 1000, |
| }, |
| { |
| "name": "chain_frames", |
| "type": "integer", |
| "description": "Number of denoising snapshots in the GIF", |
| "default": 20, |
| "min": 10, |
| "max": 30, |
| }, |
| ], |
| }, |
| { |
| "id": "multiprox", |
| "name": "MultiProx Sampling", |
| "description": ( |
| "Multi-measurement Gibbs sampling with proximal steps. " |
| "Step-by-step generation with controllable noise levels." |
| ), |
| "parameters": [ |
| { |
| "name": "diffusion_steps", |
| "type": "integer", |
| "description": "Number of diffusion steps T", |
| "default": 500, |
| "min": 50, |
| "max": 1000, |
| }, |
| { |
| "name": "m", |
| "type": "integer", |
| "description": "Number of parallel samples per multi-measurement step", |
| "default": 10, |
| "min": 2, |
| "max": 100, |
| }, |
| { |
| "name": "t", |
| "type": "float", |
| "description": "First noise level (normalized, 0-1)", |
| "default": 0.5, |
| "min": 0.0, |
| "max": 1.0, |
| }, |
| { |
| "name": "t_prime", |
| "type": "float", |
| "description": "Second noise level (normalized, 0-1). Must satisfy t_prime <= t.", |
| "default": 0.1, |
| "min": 0.0, |
| "max": 1.0, |
| }, |
| ], |
| }, |
| ] |
|
|
| |
|
|
| QUERY_STRUCTURE_INTERNAL = { |
| "1p": "1p", "2p": "2p", "3p": "3p", |
| "2i": "2i", "3i": "3i", |
| "ip": "2i1p", "pi": "1p2i", |
| } |
|
|
| |
| |
| |
| QUERY_TREE_MAPPINGS = { |
| "1p": {"nodes": {"a": 0}, "edges": {"r1": 0}}, |
| "2p": {"nodes": {"a": 0, "v1": 1}, "edges": {"r1": 0, "r2": 1}}, |
| "3p": {"nodes": {"a": 0, "v1": 1, "v2": 2}, "edges": {"r1": 0, "r2": 1, "r3": 2}}, |
| "2i": {"nodes": {"a1": 0, "a2": 2}, "edges": {"r1": 0, "r2": 2}}, |
| "3i": {"nodes": {"a1": 0, "a2": 2, "a3": 4}, "edges": {"r1": 0, "r2": 2, "r3": 4}}, |
| "ip": {"nodes": {"a1": 0, "a2": 2, "v1": 4}, "edges": {"r1": 0, "r2": 2, "r3": 4}}, |
| "pi": {"nodes": {"a1": 0, "v1": 1, "a2": 3}, "edges": {"r1": 0, "r2": 1, "r3": 3}}, |
| } |
|
|
| COINS_CONFIG_SUFFIX = { |
| "transe": "", "distmult": "_distmult", "complex": "_complex", |
| "rotate": "_rotate", "q2b": "_q2b", "kbgat": "_gnn", |
| } |
|
|
|
|
| KG_ANOMALY_DATASET_META = { |
| "freebase": { |
| "name": "FB15k-237", |
| "description": "Diffusion model trained on Freebase subgraphs", |
| }, |
| "wordnet": { |
| "name": "WN18RR", |
| "description": "Diffusion model trained on WordNet subgraphs", |
| }, |
| "nell": { |
| "name": "NELL-995", |
| "description": "Diffusion model trained on NELL subgraphs", |
| }, |
| } |
|
|