website / src /backend /api /services /constants.py
Andrej Janchevski
Implement COINs predict endpoint
3ad32ba
METHODS = [
{
"id": "coins",
"name": "COINs - Knowledge Graph Reasoning",
"thesis_section": "3.1",
"description": (
"Community-Informed Graph Embeddings (COINs) for scalable knowledge graph link prediction "
"and complex query answering. Uses community detection to localize embedding computation, "
"achieving significant speedups over full-graph methods."
),
},
{
"id": "multiproxan",
"name": "MultiProxAn - Graph Generation",
"thesis_section": "4.3",
"description": (
"Discrete denoising diffusion model for graph generation with MultiProx sampling. "
"Generates molecular graphs (QM9) and synthetic community graphs using iterative "
"multi-measurement Gibbs sampling for improved sample quality."
),
},
{
"id": "kg_anomaly",
"name": "KG Anomaly Correction",
"thesis_section": "4.4",
"description": (
"Diffusion-based knowledge graph subgraph correction. Applies the DiGress denoising "
"diffusion model to knowledge graph subgraphs to detect and correct anomalous edges."
),
},
]
COINS_DATASET_META = {
"freebase": {
"name": "FB15k-237",
"description": "Subset of Freebase knowledge base with 237 relation types",
"data_dir": "FB15k-237",
},
"wordnet": {
"name": "WN18RR",
"description": "Subset of WordNet lexical database with 11 relation types",
"data_dir": "WN18RR",
},
"nell": {
"name": "NELL-995",
"description": "Never-Ending Language Learner knowledge base with 200 relation types",
"data_dir": "NELL-995",
},
}
COINS_MODELS = [
{
"algorithm": "transe",
"name": "TransE",
"description": "Translation-based embedding model",
"supported_query_structures": ["1p"],
},
{
"algorithm": "distmult",
"name": "DistMult",
"description": "Bilinear diagonal embedding model",
"supported_query_structures": ["1p"],
},
{
"algorithm": "complex",
"name": "ComplEx",
"description": "Complex-valued embedding model",
"supported_query_structures": ["1p"],
},
{
"algorithm": "rotate",
"name": "RotatE",
"description": "Rotation-based embedding model in complex space",
"supported_query_structures": ["1p"],
},
{
"algorithm": "q2b",
"name": "Query2Box",
"description": "Box embedding model for complex logical queries",
"supported_query_structures": ["1p", "2p", "3p", "2i", "3i", "ip", "pi"],
},
{
"algorithm": "kbgat",
"name": "KBGAT",
"description": "Knowledge base graph attention network",
"supported_query_structures": ["1p"],
},
]
QUERY_STRUCTURES = [
{
"id": "1p",
"name": "Single Hop",
"description": "Direct link prediction: who/what is connected to the anchor via this relation?",
"nodes": [
{"id": "a", "type": "anchor", "label": "Anchor"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a", "target": "t", "label": "Relation"},
],
},
{
"id": "2p",
"name": "Two Hop",
"description": "Two-step chain: anchor -> variable -> target",
"nodes": [
{"id": "a", "type": "anchor", "label": "Anchor"},
{"id": "v1", "type": "variable", "label": "Variable"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a", "target": "v1", "label": "Relation 1"},
{"id": "r2", "source": "v1", "target": "t", "label": "Relation 2"},
],
},
{
"id": "3p",
"name": "Three Hop",
"description": "Three-step chain: anchor -> v1 -> v2 -> target",
"nodes": [
{"id": "a", "type": "anchor", "label": "Anchor"},
{"id": "v1", "type": "variable", "label": "Variable 1"},
{"id": "v2", "type": "variable", "label": "Variable 2"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a", "target": "v1", "label": "Relation 1"},
{"id": "r2", "source": "v1", "target": "v2", "label": "Relation 2"},
{"id": "r3", "source": "v2", "target": "t", "label": "Relation 3"},
],
},
{
"id": "2i",
"name": "Two Intersection",
"description": "Intersection of two single-hop queries sharing the same target",
"nodes": [
{"id": "a1", "type": "anchor", "label": "Anchor 1"},
{"id": "a2", "type": "anchor", "label": "Anchor 2"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a1", "target": "t", "label": "Relation 1"},
{"id": "r2", "source": "a2", "target": "t", "label": "Relation 2"},
],
},
{
"id": "3i",
"name": "Three Intersection",
"description": "Intersection of three single-hop queries sharing the same target",
"nodes": [
{"id": "a1", "type": "anchor", "label": "Anchor 1"},
{"id": "a2", "type": "anchor", "label": "Anchor 2"},
{"id": "a3", "type": "anchor", "label": "Anchor 3"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a1", "target": "t", "label": "Relation 1"},
{"id": "r2", "source": "a2", "target": "t", "label": "Relation 2"},
{"id": "r3", "source": "a3", "target": "t", "label": "Relation 3"},
],
},
{
"id": "ip",
"name": "Intersection then Projection",
"description": "Two anchors intersect, then the result projects via a third relation to the target",
"nodes": [
{"id": "a1", "type": "anchor", "label": "Anchor 1"},
{"id": "a2", "type": "anchor", "label": "Anchor 2"},
{"id": "v1", "type": "variable", "label": "Variable"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a1", "target": "v1", "label": "Relation 1"},
{"id": "r2", "source": "a2", "target": "v1", "label": "Relation 2"},
{"id": "r3", "source": "v1", "target": "t", "label": "Relation 3"},
],
},
{
"id": "pi",
"name": "Projection then Intersection",
"description": "One anchor projects then intersects with a direct connection from a second anchor",
"nodes": [
{"id": "a1", "type": "anchor", "label": "Anchor 1"},
{"id": "v1", "type": "variable", "label": "Variable"},
{"id": "a2", "type": "anchor", "label": "Anchor 2"},
{"id": "t", "type": "target", "label": "?"},
],
"edges": [
{"id": "r1", "source": "a1", "target": "v1", "label": "Relation 1"},
{"id": "r2", "source": "v1", "target": "t", "label": "Relation 2"},
{"id": "r3", "source": "a2", "target": "t", "label": "Relation 3"},
],
},
]
GRAPHGEN_DATASETS = {
"qm9": {
"name": "QM9",
"type": "molecular",
"description": "Small organic molecules with up to 9 heavy atoms (C, N, O, F)",
"node_types": ["C", "N", "O", "F"],
"edge_types": ["none", "single", "double", "triple", "aromatic"],
"max_nodes": 9,
},
"comm20": {
"name": "Community20",
"type": "synthetic",
"description": "Synthetic community-structured graphs with 12-20 nodes",
"node_types": ["node"],
"edge_types": ["none", "edge"],
"max_nodes": 20,
},
}
GRAPHGEN_SAMPLING_MODES = [
{
"id": "standard",
"name": "Standard Denoising",
"description": "Iterative denoising from T to 0. Full quality, slower.",
"parameters": [
{
"name": "diffusion_steps",
"type": "integer",
"description": "Number of diffusion steps T",
"default": 500,
"min": 50,
"max": 1000,
},
{
"name": "chain_frames",
"type": "integer",
"description": "Number of denoising snapshots in the GIF",
"default": 20,
"min": 10,
"max": 30,
},
],
},
{
"id": "multiprox",
"name": "MultiProx Sampling",
"description": (
"Multi-measurement Gibbs sampling with proximal steps. "
"Step-by-step generation with controllable noise levels."
),
"parameters": [
{
"name": "diffusion_steps",
"type": "integer",
"description": "Number of diffusion steps T",
"default": 500,
"min": 50,
"max": 1000,
},
{
"name": "m",
"type": "integer",
"description": "Number of parallel samples per multi-measurement step",
"default": 10,
"min": 2,
"max": 100,
},
{
"name": "t",
"type": "float",
"description": "First noise level (normalized, 0-1)",
"default": 0.5,
"min": 0.0,
"max": 1.0,
},
{
"name": "t_prime",
"type": "float",
"description": "Second noise level (normalized, 0-1). Must satisfy t_prime <= t.",
"default": 0.1,
"min": 0.0,
"max": 1.0,
},
],
},
]
# --- COINs predict helpers ---
QUERY_STRUCTURE_INTERNAL = {
"1p": "1p", "2p": "2p", "3p": "3p",
"2i": "2i", "3i": "3i",
"ip": "2i1p", "pi": "1p2i",
}
# Maps API node/edge IDs to query tree node/edge indices.
# Anchor/variable node IDs → tree vertex index; edge IDs → tree edge index.
# Key insight: edge_index == target_node_index in all COINs query trees.
QUERY_TREE_MAPPINGS = {
"1p": {"nodes": {"a": 0}, "edges": {"r1": 0}},
"2p": {"nodes": {"a": 0, "v1": 1}, "edges": {"r1": 0, "r2": 1}},
"3p": {"nodes": {"a": 0, "v1": 1, "v2": 2}, "edges": {"r1": 0, "r2": 1, "r3": 2}},
"2i": {"nodes": {"a1": 0, "a2": 2}, "edges": {"r1": 0, "r2": 2}},
"3i": {"nodes": {"a1": 0, "a2": 2, "a3": 4}, "edges": {"r1": 0, "r2": 2, "r3": 4}},
"ip": {"nodes": {"a1": 0, "a2": 2, "v1": 4}, "edges": {"r1": 0, "r2": 2, "r3": 4}},
"pi": {"nodes": {"a1": 0, "v1": 1, "a2": 3}, "edges": {"r1": 0, "r2": 1, "r3": 3}},
}
COINS_CONFIG_SUFFIX = {
"transe": "", "distmult": "_distmult", "complex": "_complex",
"rotate": "_rotate", "q2b": "_q2b", "kbgat": "_gnn",
}
KG_ANOMALY_DATASET_META = {
"freebase": {
"name": "FB15k-237",
"description": "Diffusion model trained on Freebase subgraphs",
},
"wordnet": {
"name": "WN18RR",
"description": "Diffusion model trained on WordNet subgraphs",
},
"nell": {
"name": "NELL-995",
"description": "Diffusion model trained on NELL subgraphs",
},
}