openapi: 3.0.3
info:
  title: Scalable Graph ML Research API
  description: |
    REST API for interactively testing PhD research methods from
    "Scalable Methods for Knowledge Graph Reasoning and Generation"
    (Andrej Janchevski, EPFL, 2025).

    Three research methods are exposed:
    - **COINs**: Community-Informed Graph Embeddings for KG link prediction and query answering
    - **MultiProxAn**: Discrete denoising diffusion for graph generation with MultiProx sampling
    - **KG Anomaly Correction**: Diffusion-based knowledge graph subgraph correction

    The API is stateless (no database). Model checkpoints are loaded in memory at startup.
    A single inference queue ensures only one inference runs at a time (429 if busy).
  version: 1.0.0
  contact:
    name: Andrej Janchevski
    url: https://bani57.pythonanywhere.com

servers:
  - url: /api/v1
    description: API v1

tags:
  - name: health
    description: Server health and method discovery
  - name: coins
    description: COINs - Knowledge Graph Reasoning
  - name: graph-generation
    description: MultiProxAn - Graph Generation
  - name: kg-anomaly
    description: KG Anomaly Correction

paths:
  # -- Health ----------------------------------------------------------
  /health:
    get:
      operationId: getHealth
      tags: [health]
      summary: Server health check
      description: Returns server status and which model groups are loaded.
      responses:
        "200":
          description: Server is healthy
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/HealthResponse"

  /methods:
    get:
      operationId: getMethods
      tags: [health]
      summary: List research methods
      description: Returns the three research methods available on this API.
      responses:
        "200":
          description: List of methods
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/MethodsResponse"

  /debug/force-unlock:
    post:
      operationId: forceUnlockInferenceLock
      tags: [health]
      summary: Release a stuck inference lock (debug only)
      description: |
        Forcibly releases the global inference lock. Only available when
        the server is running with `DJANGO_DEBUG=True`; returns `403` in
        production. Use when a crashed request left the lock held and
        subsequent requests are returning `429 INFERENCE_BUSY`.
      responses:
        "200":
          description: Lock release result
          content:
            application/json:
              schema:
                type: object
                required: [released]
                properties:
                  released:
                    type: boolean
                    description: True if a held lock was released; false if the lock was already free.
                    example: true
        "403":
          description: Not available outside debug mode
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                    example: only available in debug mode

  # -- COINs -----------------------------------------------------------
  /coins/datasets:
    get:
      operationId: getCoinsDatasets
      tags: [coins]
      summary: List KG datasets
      description: Returns available knowledge graph datasets with entity and relation counts.
      responses:
        "200":
          description: Dataset list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsDatasetsResponse"

  /coins/datasets/{dataset_id}/entities:
    get:
      operationId: getCoinsEntities
      tags: [coins]
      summary: List entities in a dataset
      description: Paginated, searchable list of entities.
      parameters:
        - $ref: "#/components/parameters/CoinsDatasetId"
        - $ref: "#/components/parameters/SearchQuery"
        - $ref: "#/components/parameters/Page"
        - $ref: "#/components/parameters/PageSize"
      responses:
        "200":
          description: Paginated entity list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsEntitiesResponse"
        "404":
          $ref: "#/components/responses/NotFound"

  /coins/datasets/{dataset_id}/relations:
    get:
      operationId: getCoinsRelations
      tags: [coins]
      summary: List relations in a dataset
      description: Paginated, searchable list of relations.
      parameters:
        - $ref: "#/components/parameters/CoinsDatasetId"
        - $ref: "#/components/parameters/SearchQuery"
        - $ref: "#/components/parameters/Page"
        - $ref: "#/components/parameters/PageSize"
      responses:
        "200":
          description: Paginated relation list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsRelationsResponse"
        "404":
          $ref: "#/components/responses/NotFound"

  /coins/datasets/{dataset_id}/sample-triples:
    get:
      operationId: getCoinsSampleTriples
      tags: [coins]
      summary: Random sample triples
      description: Returns random triples from the dataset, useful for populating example queries.
      parameters:
        - $ref: "#/components/parameters/CoinsDatasetId"
        - name: count
          in: query
          schema:
            type: integer
            minimum: 1
            maximum: 50
            default: 10
          description: Number of random triples to return
        - name: seed
          in: query
          required: false
          schema:
            type: string
          description: |
            Optional sampling seed. When provided, sampling is deterministic —
            the same `(dataset_id, count, seed)` always yields the same triples.
            Useful for day-stable "fact of the day" widgets (e.g. seed by the
            ISO date). When omitted, sampling is fully random.
          example: "2026-04-15"
      responses:
        "200":
          description: Sample triples
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsSampleTriplesResponse"
        "404":
          $ref: "#/components/responses/NotFound"

  /coins/datasets/{dataset_id}/sample-query:
    get:
      operationId: getCoinsSampleQuery
      tags: [coins]
      summary: Sample a valid KG query
      description: |
        Walks the training knowledge graph to produce a structurally valid query
        for the given query structure. Each returned query contains anchor entities,
        relations, and a known target entity — guaranteed to form a real path or
        intersection in the KG. This is the recommended way to populate the query
        builder UI with meaningful defaults. For simple 1p (single-hop) queries the
        result is equivalent to a random triple; for multi-hop and intersection
        structures (2p, 3p, 2i, 3i, ip, pi) the sampler walks backward from a
        random target through actual KG edges.
      parameters:
        - $ref: "#/components/parameters/CoinsDatasetId"
        - name: query_structure
          in: query
          required: true
          schema:
            $ref: "#/components/schemas/CoinsQueryStructureEnum"
          description: The query structure to sample for.
        - name: count
          in: query
          schema:
            type: integer
            minimum: 1
            maximum: 10
            default: 1
          description: Number of sample queries to return.
        - name: seed
          in: query
          required: false
          schema:
            type: string
          description: |
            Optional sampling seed for deterministic results. Same
            `(dataset_id, query_structure, count, seed)` always yields the
            same queries. Useful for day-stable prefills (e.g. seed by ISO date).
          example: "2026-04-17"
      responses:
        "200":
          description: Sampled queries
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsSampleQueryResponse"
        "400":
          $ref: "#/components/responses/InvalidRequest"
        "404":
          $ref: "#/components/responses/NotFound"

  /coins/models:
    get:
      operationId: getCoinsModels
      tags: [coins]
      summary: List embedding algorithms
      description: |
        Returns available embedding algorithms with their supported query structures
        and datasets. TransE, DistMult, ComplEx, RotatE support 1p only.
        Q2B supports all query structures (1p, 2p, 3p, 2i, 3i, ip, pi).
        TransE, DistMult, ComplEx, RotatE, and KBGAT support 1p only.
      responses:
        "200":
          description: Algorithm list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsModelsResponse"

  /coins/query-structures:
    get:
      operationId: getCoinsQueryStructures
      tags: [coins]
      summary: Query structure graph templates
      description: |
        Returns a graph template per query structure. Each template defines nodes
        (anchor, variable, target) and edges that the frontend renders as a small
        interactive graph with entity dropdowns on anchor nodes and relation
        dropdowns on edges.
      responses:
        "200":
          description: Query structure templates
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsQueryStructuresResponse"

  /coins/predict:
    post:
      operationId: coinsPredict
      tags: [coins]
      summary: Run link prediction / query answering
      description: |
        Submit a filled-in query graph and get ranked entity predictions for the
        target node. The `anchors` and `relations` keys must match the node and
        edge IDs from the query structure template.

        The response includes per-step timing (Step 1: community detection +
        localized embedding, Step 2: link prediction) and a simulated baseline
        estimate showing the COINs speedup.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/CoinsPredictRequest"
            examples:
              single_hop:
                summary: "1p - single hop"
                value:
                  dataset_id: wordnet
                  algorithm: rotate
                  query_structure: "1p"
                  anchors: { "a": 11754 }
                  relations: { "r1": 3 }
                  top_k: 10
              two_intersection:
                summary: "2i - two-way intersection"
                value:
                  dataset_id: wordnet
                  algorithm: q2b
                  query_structure: "2i"
                  anchors: { "a1": 11754, "a2": 5142 }
                  relations: { "r1": 3, "r2": 1 }
                  top_k: 10
              intersection_projection:
                summary: "ip - intersection then projection"
                value:
                  dataset_id: wordnet
                  algorithm: q2b
                  query_structure: ip
                  anchors: { "a1": 11754, "a2": 5142 }
                  relations: { "r1": 3, "r2": 1, "r3": 2 }
                  top_k: 10
      responses:
        "200":
          description: Ranked predictions
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CoinsPredictResponse"
        "400":
          $ref: "#/components/responses/InvalidRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "422":
          $ref: "#/components/responses/InferenceError"
        "429":
          $ref: "#/components/responses/InferenceBusy"
        "503":
          $ref: "#/components/responses/ModelUnavailable"

  # -- Graph Generation ------------------------------------------------
  /graph-generation/datasets:
    get:
      operationId: getGraphGenDatasets
      tags: [graph-generation]
      summary: List graph generation datasets
      description: Returns available graph types with node/edge type info and model availability.
      responses:
        "200":
          description: Dataset list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/GraphGenDatasetsResponse"

  /graph-generation/sampling-modes:
    get:
      operationId: getGraphGenSamplingModes
      tags: [graph-generation]
      summary: List sampling strategies
      description: Returns available sampling modes (standard, multiprox) with parameter specs.
      responses:
        "200":
          description: Sampling mode list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/GraphGenSamplingModesResponse"

  /graph-generation/generate:
    post:
      operationId: graphGenGenerate
      tags: [graph-generation]
      summary: Generate a graph (SSE streaming)
      description: |
        Server-Sent Events stream (`text/event-stream`). Emits `progress`
        events during diffusion, optional `preview` events with intermediate
        PNGs, and a terminal `result` event whose `data` payload is the JSON
        described below.

        **Standard mode**: runs full diffusion (T->0); terminal `result`
        payload conforms to `GraphGenStandardResponse` (animated GIF + final PNG).

        **MultiProx mode**: runs the first Gibbs iteration; terminal `result`
        payload conforms to `GraphGenMultiProxResponse` and includes an opaque
        `state` blob to be passed to `/graph-generation/continue`.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/GraphGenGenerateRequest"
            examples:
              standard_qm9:
                summary: Standard QM9 molecule generation
                value:
                  dataset_id: qm9
                  model_type: discrete
                  sampling_mode: standard
                  num_nodes: null
                  diffusion_steps: 500
                  chain_frames: 20
              multiprox_comm20:
                summary: MultiProx Community20 generation
                value:
                  dataset_id: comm20
                  model_type: discrete
                  sampling_mode: multiprox
                  num_nodes: null
                  diffusion_steps: 500
                  multiprox_params:
                    m: 10
                    t: 0.5
                    t_prime: 0.1
      responses:
        "200":
          description: SSE stream of progress/preview events terminated by a result event
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/GraphGenSseStream"
        "400":
          $ref: "#/components/responses/InvalidRequest"
        "429":
          $ref: "#/components/responses/InferenceBusy"
        "503":
          $ref: "#/components/responses/ModelUnavailable"

  /graph-generation/continue:
    post:
      operationId: graphGenContinue
      tags: [graph-generation]
      summary: Advance MultiProx generation by one step (SSE streaming)
      description: |
        SSE stream (`text/event-stream`). Advances the MultiProx
        multi-measurement chain by one Gibbs iteration. The client must send
        back the opaque `state` from the previous step's `result` event.
        Emits `progress` and `preview` events, then a terminal `result` event
        with the `GraphGenMultiProxResponse` payload (including the updated
        `state`). The API remains fully stateless -- no server-side sessions.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/GraphGenContinueRequest"
      responses:
        "200":
          description: SSE stream of progress/preview events terminated by a result event
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/GraphGenSseStream"
        "400":
          $ref: "#/components/responses/InvalidRequest"
        "429":
          $ref: "#/components/responses/InferenceBusy"

  # -- KG Anomaly Correction ------------------------------------------
  /kg-anomaly/datasets:
    get:
      operationId: getKgAnomalyDatasets
      tags: [kg-anomaly]
      summary: List KG anomaly correction datasets
      description: Returns datasets for which correction models are available.
      responses:
        "200":
          description: Dataset list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/KgAnomalyDatasetsResponse"

  /kg-anomaly/datasets/{dataset_id}/sample-subgraphs:
    get:
      operationId: getKgAnomalySampleSubgraphs
      tags: [kg-anomaly]
      summary: Get example subgraphs for correction
      description: |
        Returns pre-computed example subgraphs from the test set. When
        `noise_level` is supplied, the model's forward diffusion is applied
        to each subgraph's edges so the caller receives a corrupted input
        ready for `/kg-anomaly/correct`. For `task=correct` only the edges
        inside the inpaint mask (second half of nodes) are noised; for
        `task=generate` every edge is noised.
      parameters:
        - $ref: "#/components/parameters/KgAnomalyDatasetId"
        - name: count
          in: query
          schema:
            type: integer
            minimum: 1
            maximum: 10
            default: 5
          description: Number of sample subgraphs to return
        - name: noise_level
          in: query
          required: false
          schema:
            type: number
            minimum: 0.0
            exclusiveMinimum: true
            maximum: 1.0
          description: |
            Fraction of the full diffusion horizon T at which to sample
            noised edges (e.g. 0.4 for moderate corruption). Omit to receive
            the clean subgraphs.
        - name: task
          in: query
          required: false
          schema:
            type: string
            enum: [correct, generate]
            default: correct
          description: Task the noise should align with. Ignored if noise_level is not set.
        - name: seed
          in: query
          required: false
          schema:
            type: integer
          description: Optional RNG seed for reproducible noise.
      responses:
        "200":
          description: Sample subgraphs
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/KgAnomalySampleSubgraphsResponse"
        "404":
          $ref: "#/components/responses/NotFound"

  /kg-anomaly/correct:
    post:
      operationId: kgAnomalyCorrect
      tags: [kg-anomaly]
      summary: Correct a KG subgraph (SSE streaming)
      description: |
        Server-Sent Events stream (`text/event-stream`). Emits `progress`
        events during diffusion, optional `preview` events with intermediate
        PNGs, and a terminal `result` event whose `data` payload is the JSON
        described below.

        **Standard mode**: runs full diffusion correction; terminal `result`
        payload conforms to `KgAnomalyStandardResponse`.

        **MultiProx mode**: runs the first Gibbs iteration; terminal `result`
        payload conforms to `KgAnomalyMultiProxResponse` and includes an
        opaque `state` blob to be passed to `/kg-anomaly/continue`.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/KgAnomalyCorrectRequest"
            examples:
              standard:
                summary: Standard correction
                value:
                  dataset_id: wordnet
                  sampling_mode: standard
                  subgraph:
                    nodes:
                      - entity_id: 11754
                        type_id: 3
                      - entity_id: 5142
                        type_id: 3
                      - entity_id: 8142
                        type_id: 3
                    edges:
                      - source_idx: 0
                        target_idx: 1
                        relation_id: 3
                      - source_idx: 1
                        target_idx: 2
                        relation_id: 1
                  chain_frames: 20
              multiprox:
                summary: MultiProx correction (future)
                value:
                  dataset_id: wordnet
                  sampling_mode: multiprox
                  subgraph:
                    nodes:
                      - entity_id: 11754
                        type_id: 3
                      - entity_id: 5142
                        type_id: 3
                    edges:
                      - source_idx: 0
                        target_idx: 1
                        relation_id: 3
                  multiprox_params:
                    m: 10
                    t: 0.5
                    t_prime: 0.1
      responses:
        "200":
          description: SSE stream of progress/preview events terminated by a result event
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/KgAnomalySseStream"
        "400":
          $ref: "#/components/responses/InvalidRequest"
        "404":
          $ref: "#/components/responses/NotFound"
        "422":
          $ref: "#/components/responses/InferenceError"
        "429":
          $ref: "#/components/responses/InferenceBusy"
        "503":
          $ref: "#/components/responses/ModelUnavailable"

  /kg-anomaly/continue:
    post:
      operationId: kgAnomalyContinue
      tags: [kg-anomaly]
      summary: Advance MultiProx correction by one step (SSE streaming)
      description: |
        SSE stream (`text/event-stream`). Advances the MultiProx correction
        chain by one Gibbs iteration. The client must send back the opaque
        `state` from the previous step's `result` event. Emits `progress`
        and `preview` events, then a terminal `result` event with the
        `KgAnomalyMultiProxResponse` payload (including the updated `state`).
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/KgAnomalyContinueRequest"
      responses:
        "200":
          description: SSE stream of progress/preview events terminated by a result event
          content:
            text/event-stream:
              schema:
                $ref: "#/components/schemas/KgAnomalySseStream"
        "400":
          $ref: "#/components/responses/InvalidRequest"
        "429":
          $ref: "#/components/responses/InferenceBusy"

# ========================================================================
components:
  # -- Parameters ----------------------------------------------------
  parameters:
    CoinsDatasetId:
      name: dataset_id
      in: path
      required: true
      schema:
        $ref: "#/components/schemas/CoinsDatasetIdEnum"
      description: Knowledge graph dataset identifier

    KgAnomalyDatasetId:
      name: dataset_id
      in: path
      required: true
      schema:
        $ref: "#/components/schemas/KgAnomalyDatasetIdEnum"
      description: KG anomaly correction dataset identifier


    SearchQuery:
      name: q
      in: query
      schema:
        type: string
      description: Substring search filter

    Page:
      name: page
      in: query
      schema:
        type: integer
        minimum: 1
        default: 1
      description: Page number (1-indexed)

    PageSize:
      name: page_size
      in: query
      schema:
        type: integer
        minimum: 1
        maximum: 200
        default: 50
      description: Items per page

  # -- Responses (errors) --------------------------------------------
  responses:
    InvalidRequest:
      description: Malformed input
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            error:
              code: INVALID_REQUEST
              message: "Anchor keys {a1} do not match template anchors {a} for query structure 1p"
              details: {}

    NotFound:
      description: Resource not found
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            error:
              code: NOT_FOUND
              message: "Dataset 'unknown' not found"
              details: {}

    InferenceError:
      description: Model inference failed
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            error:
              code: INFERENCE_ERROR
              message: "Inference failed: entity_id 99999 out of range for dataset wordnet"
              details: {}

    InferenceBusy:
      description: Another inference is already running
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            error:
              code: INFERENCE_BUSY
              message: "Another inference is in progress. Please wait and try again."
              details: {}

    ModelUnavailable:
      description: Model checkpoint not loaded
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
          example:
            error:
              code: MODEL_UNAVAILABLE
              message: "Model for dataset wordnet with algorithm rotate is not loaded"
              details: {}

  # -- Schemas -------------------------------------------------------
  schemas:
    # -- Enums --
    CoinsDatasetIdEnum:
      type: string
      enum: [freebase, wordnet, nell]

    CoinsAlgorithmEnum:
      type: string
      enum: [transe, distmult, complex, rotate, q2b, kbgat]

    CoinsQueryStructureEnum:
      type: string
      enum: ["1p", "2p", "3p", "2i", "3i", ip, pi]

    GraphGenDatasetIdEnum:
      type: string
      enum: [qm9, comm20]

    GraphGenModelTypeEnum:
      type: string
      enum: [discrete, continuous]

    SamplingModeEnum:
      type: string
      enum: [standard, multiprox]

    KgAnomalyDatasetIdEnum:
      type: string
      enum: [freebase, wordnet, nell]

    QueryNodeTypeEnum:
      type: string
      enum: [anchor, variable, target]

    EdgeChangeStatusEnum:
      type: string
      enum: [unchanged, modified, added, removed]

    # -- Error --
    ErrorResponse:
      type: object
      required: [error]
      properties:
        error:
          type: object
          required: [code, message]
          properties:
            code:
              type: string
            message:
              type: string
            details:
              type: object
              additionalProperties: true

    # -- Health --
    HealthResponse:
      type: object
      required: [status, models_loaded]
      properties:
        status:
          type: string
          example: ok
        models_loaded:
          type: object
          required: [coins, multiproxan, kg_anomaly]
          properties:
            coins:
              type: boolean
            multiproxan:
              type: boolean
            kg_anomaly:
              type: boolean

    MethodsResponse:
      type: object
      required: [methods]
      properties:
        methods:
          type: array
          items:
            type: object
            required: [id, name, thesis_section, description]
            properties:
              id:
                type: string
                example: coins
              name:
                type: string
                example: COINs - Knowledge Graph Reasoning
              thesis_section:
                type: string
                example: "3.1"
              description:
                type: string

    # -- COINs: Discovery --
    CoinsDatasetsResponse:
      type: object
      required: [datasets]
      properties:
        datasets:
          type: array
          items:
            $ref: "#/components/schemas/CoinsDataset"

    CoinsDataset:
      type: object
      required: [id, name, num_entities, num_relations, description]
      properties:
        id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        name:
          type: string
          example: WN18RR
        num_entities:
          type: integer
          example: 41105
        num_relations:
          type: integer
          example: 11
        description:
          type: string
          example: Subset of WordNet lexical database with 11 relation types

    CoinsEntitiesResponse:
      type: object
      required: [dataset_id, total, page, page_size, entities]
      properties:
        dataset_id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        total:
          type: integer
        page:
          type: integer
        page_size:
          type: integer
        entities:
          type: array
          items:
            $ref: "#/components/schemas/CoinsEntity"

    CoinsEntity:
      type: object
      required: [id, name]
      properties:
        id:
          type: integer
          example: 0
        name:
          type: string
          example: dog's_breakfast.n.01
        label:
          type: string
          description: Dataset-specific short, display-friendly form of `name`. Only present on sample-triples responses.
          example: dog's_breakfast

    CoinsRelationsResponse:
      type: object
      required: [dataset_id, total, page, page_size, relations]
      properties:
        dataset_id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        total:
          type: integer
        page:
          type: integer
        page_size:
          type: integer
        relations:
          type: array
          items:
            $ref: "#/components/schemas/CoinsRelation"

    CoinsRelation:
      type: object
      required: [id, name]
      properties:
        id:
          type: integer
          example: 0
        name:
          type: string
          example: _hypernym
        label:
          type: string
          description: Dataset-specific short, display-friendly form of `name`. Only present on sample-triples responses.
          example: hypernym

    CoinsSampleTriplesResponse:
      type: object
      required: [dataset_id, triples]
      properties:
        dataset_id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        triples:
          type: array
          items:
            $ref: "#/components/schemas/CoinsTriple"

    CoinsTriple:
      type: object
      required: [head, relation, tail]
      properties:
        head:
          $ref: "#/components/schemas/CoinsEntity"
        relation:
          $ref: "#/components/schemas/CoinsRelation"
        tail:
          $ref: "#/components/schemas/CoinsEntity"

    CoinsSampleQueryResponse:
      type: object
      required: [dataset_id, query_structure, queries]
      properties:
        dataset_id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        query_structure:
          $ref: "#/components/schemas/CoinsQueryStructureEnum"
        queries:
          type: array
          description: |
            Sampled queries. Each query is a valid walk through the training KG.
            May return fewer than `count` if the graph is too sparse for the
            requested structure (e.g. 3i requires a target with at least 3 in-edges).
          items:
            $ref: "#/components/schemas/CoinsSampledQuery"

    CoinsSampledQuery:
      type: object
      required: [anchors, relations, target]
      description: |
        A structurally valid query instance. `anchors` and `relations` keys match the
        node/edge IDs from `GET /coins/query-structures` for the given structure.
        The `target` is the known answer entity.
      properties:
        anchors:
          type: object
          additionalProperties:
            $ref: "#/components/schemas/CoinsEntity"
          description: "Anchor entities keyed by node ID (e.g. `a`, `a1`, `a2`, `a3`)."
          example:
            a1: { id: 123, name: "/m/06thjt", label: "06thjt" }
            a2: { id: 456, name: "/m/0d_kd", label: "0d_kd" }
        relations:
          type: object
          additionalProperties:
            $ref: "#/components/schemas/CoinsRelation"
          description: "Relations keyed by edge ID (e.g. `r1`, `r2`, `r3`)."
          example:
            r1: { id: 7, name: "/people/person/nationality", label: "person nationality" }
            r2: { id: 12, name: "/location/country/capital", label: "country capital" }
        target:
          $ref: "#/components/schemas/CoinsEntity"

    CoinsModelsResponse:
      type: object
      required: [models]
      properties:
        models:
          type: array
          items:
            $ref: "#/components/schemas/CoinsModel"

    CoinsModel:
      type: object
      required: [algorithm, name, description, supported_query_structures, available_datasets]
      properties:
        algorithm:
          $ref: "#/components/schemas/CoinsAlgorithmEnum"
        name:
          type: string
          example: TransE
        description:
          type: string
          example: Translation-based embedding model
        supported_query_structures:
          type: array
          items:
            $ref: "#/components/schemas/CoinsQueryStructureEnum"
          example: ["1p"]
        available_datasets:
          type: array
          items:
            $ref: "#/components/schemas/CoinsDatasetIdEnum"
          example: [freebase, wordnet, nell]

    # -- COINs: Query Structures --
    CoinsQueryStructuresResponse:
      type: object
      required: [query_structures]
      properties:
        query_structures:
          type: array
          items:
            $ref: "#/components/schemas/CoinsQueryStructure"

    CoinsQueryStructure:
      type: object
      required: [id, name, description, nodes, edges]
      properties:
        id:
          $ref: "#/components/schemas/CoinsQueryStructureEnum"
        name:
          type: string
          example: Single Hop
        description:
          type: string
          example: "Direct link prediction: who/what is connected to the anchor via this relation?"
        nodes:
          type: array
          items:
            $ref: "#/components/schemas/QueryNode"
        edges:
          type: array
          items:
            $ref: "#/components/schemas/QueryEdge"

    QueryNode:
      type: object
      required: [id, type, label]
      properties:
        id:
          type: string
          description: Node identifier referenced by edges and the predict request
          example: a
        type:
          $ref: "#/components/schemas/QueryNodeTypeEnum"
        label:
          type: string
          description: Display label for the frontend
          example: Anchor

    QueryEdge:
      type: object
      required: [id, source, target, label]
      properties:
        id:
          type: string
          description: Edge identifier referenced by the predict request
          example: r1
        source:
          type: string
          description: Source node id
          example: a
        target:
          type: string
          description: Target node id
          example: t
        label:
          type: string
          description: Display label for the frontend
          example: Relation

    # -- COINs: Predict --
    CoinsPredictRequest:
      type: object
      required: [dataset_id, algorithm, query_structure, anchors, relations]
      properties:
        dataset_id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        algorithm:
          $ref: "#/components/schemas/CoinsAlgorithmEnum"
        query_structure:
          $ref: "#/components/schemas/CoinsQueryStructureEnum"
        anchors:
          type: object
          additionalProperties:
            type: integer
          description: |
            Map of anchor node id -> entity id. Keys must match all anchor nodes
            in the query structure template.
          example: { "a": 11754 }
        variables:
          type: object
          additionalProperties:
            type: integer
          description: |
            Optional map of variable node id -> entity id. Keys must be a subset
            of the variable nodes in the query structure template. Omitted variable
            nodes are sampled automatically from the graph given the anchors and relations.
          example: { "v1": 5432 }
        relations:
          type: object
          additionalProperties:
            type: integer
          description: |
            Map of edge id -> relation id. Keys must match all edges in the
            query structure template.
          example: { "r1": 3 }
        top_k:
          type: integer
          minimum: 1
          maximum: 10
          default: 10
          description: Number of top predictions to return

    CoinsPredictResponse:
      type: object
      required: [dataset_id, algorithm, query_structure, query_description, predictions, timing]
      properties:
        dataset_id:
          $ref: "#/components/schemas/CoinsDatasetIdEnum"
        algorithm:
          $ref: "#/components/schemas/CoinsAlgorithmEnum"
        query_structure:
          $ref: "#/components/schemas/CoinsQueryStructureEnum"
        query_description:
          type: string
          description: Human-readable rendering of the query with entity/relation names
          example: "dog's_breakfast.n.01 --[_hypernym]--> ?"
        predictions:
          type: array
          items:
            $ref: "#/components/schemas/CoinsPrediction"
        timing:
          $ref: "#/components/schemas/CoinsTiming"

    CoinsPrediction:
      type: object
      required: [rank, intra_community_rank, entity_id, entity_name, score, is_valid_answer]
      properties:
        rank:
          type: integer
          minimum: 1
          description: |
            COINs aggregate rank from rank_samples: rank = c_err + intra_community_rank.
            Sum of all entity counts in communities with better step-1 score, plus the
            rank of this prediction within its community.
          example: 14
        intra_community_rank:
          type: integer
          minimum: 1
          description: Rank of this prediction within the hit community (intra_community_rank in rank_samples)
          example: 1
        entity_id:
          type: integer
          example: 12045
        entity_name:
          type: string
          example: dish.n.02
        score:
          type: number
          format: float
          example: 0.923
        is_valid_answer:
          type: boolean
          description: |
            True if this entity actually satisfies the query in the KG (i.e. is a member of
            `get_all_answers`). Step 2 scores every entity in the hit community, so any
            prediction may be a non-answer the model surfaced — the flag lets the frontend
            mark the genuine hits.
          example: true

    CoinsTiming:
      type: object
      required: [step1_ms, step1_label, step2_ms, step2_label, total_ms, rank_c, baseline_estimate_ms, baseline_label, speedup]
      properties:
        step1_ms:
          type: number
          format: float
          description: "Time for Step 1: community detection + localized embedding"
          example: 12
        step1_label:
          type: string
          example: Community detection + localized embedding
        step2_ms:
          type: number
          format: float
          description: "Time for Step 2: community search + link prediction (covers all communities tried)"
          example: 33
        step2_label:
          type: string
          example: Link prediction
        total_ms:
          type: number
          format: float
          description: Total COINs inference time (step1 + step2)
          example: 45
        rank_c:
          type: integer
          minimum: 0
          description: |
            Step-1 rank of the community where predictions were found (rank_c in rank_samples).
            1 = top-ranked community was a hit. >1 = had to search deeper.
            0 = no predictions found in any community.
          example: 1
        baseline_estimate_ms:
          type: number
          format: float
          description: Simulated estimate of full-graph inference without COINs
          example: 320
        baseline_label:
          type: string
          example: Estimated baseline (without COINs)
        speedup:
          type: number
          format: float
          description: "Proposition 3.1 speedup: V / (K + c_err + |C_{rank_c}|)"
          example: 7.1

    # -- Graph Generation: Discovery --
    GraphGenDatasetsResponse:
      type: object
      required: [datasets]
      properties:
        datasets:
          type: array
          items:
            $ref: "#/components/schemas/GraphGenDataset"

    GraphGenDataset:
      type: object
      required: [id, name, type, description, node_types, edge_types, max_nodes, available_model_types]
      properties:
        id:
          $ref: "#/components/schemas/GraphGenDatasetIdEnum"
        name:
          type: string
          example: QM9
        type:
          type: string
          enum: [molecular, synthetic]
          example: molecular
        description:
          type: string
          example: Small organic molecules with up to 9 heavy atoms (C, N, O, F)
        node_types:
          type: array
          items:
            type: string
          example: [C, N, O, F]
        edge_types:
          type: array
          items:
            type: string
          example: [none, single, double, triple, aromatic]
        max_nodes:
          type: integer
          example: 9
        available_model_types:
          type: array
          items:
            $ref: "#/components/schemas/GraphGenModelTypeEnum"
          example: [discrete, continuous]

    GraphGenSamplingModesResponse:
      type: object
      required: [sampling_modes]
      properties:
        sampling_modes:
          type: array
          items:
            $ref: "#/components/schemas/GraphGenSamplingMode"

    GraphGenSamplingMode:
      type: object
      required: [id, name, description]
      properties:
        id:
          $ref: "#/components/schemas/SamplingModeEnum"
        name:
          type: string
          example: Standard Denoising
        description:
          type: string
          example: "Iterative denoising from T to 0. Full quality, slower."
        parameters:
          type: array
          description: |
            Configurable parameters for this sampling mode.
            Standard mode: T (diffusion steps).
            MultiProx mode: T (diffusion steps), m (parallel samples), t (noise level), t_prime (noise level).
          items:
            $ref: "#/components/schemas/SamplingParameter"

    SamplingParameter:
      type: object
      required: [name, type, description, default, min, max]
      properties:
        name:
          type: string
          example: m
        type:
          type: string
          enum: [integer, float]
        description:
          type: string
          example: Number of parallel samples per multi-measurement step
        default:
          type: number
        min:
          type: number
        max:
          type: number

    # -- Graph Generation: Generate --
    GraphGenGenerateRequest:
      type: object
      required: [dataset_id, model_type, sampling_mode]
      properties:
        dataset_id:
          $ref: "#/components/schemas/GraphGenDatasetIdEnum"
        model_type:
          $ref: "#/components/schemas/GraphGenModelTypeEnum"
        sampling_mode:
          $ref: "#/components/schemas/SamplingModeEnum"
        num_nodes:
          type: integer
          nullable: true
          description: "Number of nodes. null = sampled from learned distribution."
          example: null
        diffusion_steps:
          type: integer
          minimum: 50
          maximum: 1000
          default: 500
          description: Number of diffusion steps T (both standard and multiprox modes)
        chain_frames:
          type: integer
          minimum: 10
          maximum: 30
          default: 20
          description: Number of denoising snapshots in the GIF (standard mode only)
        multiprox_params:
          $ref: "#/components/schemas/MultiProxParams"

    MultiProxParams:
      type: object
      nullable: true
      description: |
        Required when sampling_mode is multiprox, null otherwise.
        Constraint: t_prime <= t (the second noise level must not exceed the first).
        Returns 400 INVALID_REQUEST if violated.
      required: [m, t, t_prime]
      properties:
        m:
          type: integer
          minimum: 2
          maximum: 100
          description: Number of parallel samples per multi-measurement step
          example: 10
        t:
          type: number
          format: float
          minimum: 0.0
          maximum: 1.0
          description: First noise level (normalized, 0-1)
          example: 0.5
        t_prime:
          type: number
          format: float
          minimum: 0.0
          maximum: 1.0
          description: "Second noise level (normalized, 0-1). Must satisfy t_prime <= t."
          example: 0.1

    GraphGenStandardResponse:
      type: object
      required: [dataset_id, model_type, sampling_mode, image, chain_gif, inference_time_ms]
      properties:
        dataset_id:
          $ref: "#/components/schemas/GraphGenDatasetIdEnum"
        model_type:
          $ref: "#/components/schemas/GraphGenModelTypeEnum"
        sampling_mode:
          type: string
          enum: [standard]
        image:
          type: string
          format: byte
          description: Final generated graph as base64 PNG (data URI)
          example: "data:image/png;base64,iVBORw0KGgo..."
        chain_gif:
          type: string
          format: byte
          description: Animated GIF of the denoising process (data URI), played once by frontend
          example: "data:image/gif;base64,R0lGODlh..."
        inference_time_ms:
          type: number
          format: float
          example: 3200

    GraphGenContinueRequest:
      type: object
      required: [state]
      properties:
        state:
          type: string
          format: byte
          description: |
            Opaque base64 blob containing the serialized diffusion state from
            the previous step's response. Must be passed back unchanged.

    GraphGenMultiProxResponse:
      type: object
      required: [step, image, state, inference_time_ms]
      properties:
        step:
          type: integer
          minimum: 0
          description: Current Gibbs iteration number (0-indexed)
          example: 0
        image:
          type: string
          format: byte
          description: Current graph state as base64 PNG (data URI)
          example: "data:image/png;base64,iVBORw0KGgo..."
        state:
          type: string
          format: byte
          description: |
            Opaque base64 blob containing the serialized diffusion state.
            Pass this back to the continue endpoint for the next step.
          example: "base64-encoded-diffusion-state..."
        inference_time_ms:
          type: number
          format: float
          example: 800

    # -- Graph Generation SSE stream ----------------------------------
    GraphGenSseStream:
      type: string
      description: |
        SSE text stream. Each event is `event: <name>\ndata: <payload>\n\n`.

        * `event: progress` -- payload is `GraphGenProgressEvent` JSON.
        * `event: preview` -- payload is a raw `data:image/png;base64,...` data URI
          (intermediate graph snapshot; not JSON).
        * `event: result` -- payload is a `GraphGenStandardResponse` (standard mode)
          or `GraphGenMultiProxResponse` (multiprox mode / continue) JSON.
        * `event: error` -- payload is an error object with `code` and `message`.

    GraphGenProgressEvent:
      type: object
      required: [type, stage]
      properties:
        type:
          type: string
          enum: [progress]
        stage:
          type: string
          description: Current phase (e.g. "denoise", "noise", "refine")
        step:
          type: integer
          description: Current step within the stage
        total:
          type: integer
          description: Total steps in the stage

    # -- KG Anomaly Correction: Discovery --
    KgAnomalyDatasetsResponse:
      type: object
      required: [datasets]
      properties:
        datasets:
          type: array
          items:
            $ref: "#/components/schemas/KgAnomalyDataset"

    KgAnomalyDataset:
      type: object
      required: [id, name, description]
      properties:
        id:
          $ref: "#/components/schemas/KgAnomalyDatasetIdEnum"
        name:
          type: string
          example: WN18RR
        description:
          type: string
          example: Diffusion model trained on WordNet subgraphs

    KgAnomalySampleSubgraphsResponse:
      type: object
      required: [dataset_id, subgraphs]
      properties:
        dataset_id:
          $ref: "#/components/schemas/KgAnomalyDatasetIdEnum"
        subgraphs:
          type: array
          items:
            $ref: "#/components/schemas/KgAnomalySampleSubgraph"

    KgAnomalySampleSubgraph:
      type: object
      required: [id, num_nodes, num_edges, nodes, edges]
      properties:
        id:
          type: string
          example: sample_1
        num_nodes:
          type: integer
          example: 6
        num_edges:
          type: integer
          example: 8
        nodes:
          type: array
          items:
            $ref: "#/components/schemas/SubgraphNode"
        edges:
          type: array
          items:
            $ref: "#/components/schemas/SubgraphEdgeWithNames"

    SubgraphNode:
      type: object
      required: [entity_id, type_id]
      properties:
        entity_id:
          type: integer
          example: 11754
        entity_name:
          type: string
          description: Present in responses, not required in requests
          example: dog's_breakfast.n.01
        type_id:
          type: integer
          example: 3

    SubgraphEdge:
      type: object
      required: [source_idx, target_idx, relation_id]
      properties:
        source_idx:
          type: integer
          description: Index into the nodes array
          example: 0
        target_idx:
          type: integer
          description: Index into the nodes array
          example: 1
        relation_id:
          type: integer
          example: 3

    SubgraphEdgeWithNames:
      allOf:
        - $ref: "#/components/schemas/SubgraphEdge"
        - type: object
          properties:
            entity_name_source:
              type: string
              example: dog's_breakfast.n.01
            entity_name_target:
              type: string
              example: bulldog.v.01
            relation_name:
              type: string
              example: _hypernym

    # -- KG Anomaly Correction: Correct --
    KgAnomalyTaskEnum:
      type: string
      enum: [generate, correct]
      description: |
        "generate" = generate all edges from scratch (no masking).
        "correct" = keep fixed edges unchanged, only correct the masked edges.

    KgAnomalyCorrectRequest:
      type: object
      required: [dataset_id, sampling_mode, subgraph]
      properties:
        dataset_id:
          $ref: "#/components/schemas/KgAnomalyDatasetIdEnum"
        sampling_mode:
          $ref: "#/components/schemas/SamplingModeEnum"
        task:
          $ref: "#/components/schemas/KgAnomalyTaskEnum"
          default: correct
          description: |
            "generate" = ignore the input subgraph edges and generate a new subgraph from scratch.
            "correct" (default) = keep fixed edges unchanged, only correct the masked (anomalous) edges.
            The mask is computed automatically based on the subgraph structure.
        subgraph:
          $ref: "#/components/schemas/InputSubgraph"
        diffusion_steps:
          type: integer
          minimum: 50
          maximum: 1000
          default: 500
          description: Number of diffusion steps T (both standard and multiprox modes)
        chain_frames:
          type: integer
          minimum: 10
          maximum: 30
          default: 20
          description: Number of denoising snapshots in the GIF (standard mode only)
        multiprox_params:
          $ref: "#/components/schemas/MultiProxParams"

    InputSubgraph:
      type: object
      required: [nodes, edges]
      description: "Subgraph to correct. Maximum ~20 nodes."
      properties:
        nodes:
          type: array
          maxItems: 20
          items:
            type: object
            required: [entity_id, type_id]
            properties:
              entity_id:
                type: integer
              type_id:
                type: integer
        edges:
          type: array
          items:
            $ref: "#/components/schemas/SubgraphEdge"

    KgAnomalyStandardResponse:
      type: object
      required: [dataset_id, original_image, corrected_image, chain_gif, changes, inference_time_ms]
      properties:
        dataset_id:
          $ref: "#/components/schemas/KgAnomalyDatasetIdEnum"
        original_image:
          type: string
          format: byte
          description: Before-correction subgraph as base64 PNG (data URI)
          example: "data:image/png;base64,..."
        corrected_image:
          type: string
          format: byte
          description: After-correction subgraph as base64 PNG with color-coded edges (data URI)
          example: "data:image/png;base64,..."
        chain_gif:
          type: string
          format: byte
          description: Animated GIF of the correction diffusion process (data URI)
          example: "data:image/gif;base64,..."
        changes:
          $ref: "#/components/schemas/CorrectionChanges"
        inference_time_ms:
          type: number
          format: float
          example: 5100

    KgAnomalyContinueRequest:
      type: object
      required: [state]
      properties:
        state:
          type: string
          format: byte
          description: |
            Opaque base64 blob containing the serialized diffusion state from
            the previous step's response. Must be passed back unchanged.

    KgAnomalyMultiProxResponse:
      type: object
      required: [step, image, state, changes, inference_time_ms]
      properties:
        step:
          type: integer
          minimum: 0
          example: 0
        image:
          type: string
          format: byte
          description: Current subgraph state as base64 PNG (data URI)
          example: "data:image/png;base64,..."
        state:
          type: string
          format: byte
          description: |
            Opaque base64 blob containing the serialized diffusion state.
            Pass this back to the continue endpoint for the next step.
          example: "base64-encoded-diffusion-state..."
        changes:
          $ref: "#/components/schemas/CorrectionChanges"
        inference_time_ms:
          type: number
          format: float
          example: 900

    CorrectionChanges:
      type: object
      required: [edges, summary]
      properties:
        edges:
          type: array
          items:
            $ref: "#/components/schemas/CorrectedEdge"
        summary:
          $ref: "#/components/schemas/CorrectionSummary"

    CorrectedEdge:
      type: object
      required: [source_idx, target_idx, relation_id, relation_name, status]
      properties:
        source_idx:
          type: integer
          example: 0
        target_idx:
          type: integer
          example: 1
        relation_id:
          type: integer
          example: 3
        relation_name:
          type: string
          example: _hypernym
        status:
          $ref: "#/components/schemas/EdgeChangeStatusEnum"
        original_relation_id:
          type: integer
          description: Only present when status is "modified"
          example: 99

    CorrectionSummary:
      type: object
      required: [unchanged, modified, added, removed]
      properties:
        unchanged:
          type: integer
          example: 2
        modified:
          type: integer
          example: 1
        added:
          type: integer
          example: 0
        removed:
          type: integer
          example: 0

    # -- KG Anomaly SSE stream ----------------------------------------
    KgAnomalySseStream:
      type: string
      description: |
        SSE text stream. Each event is `event: <name>\ndata: <payload>\n\n`.

        * `event: progress` -- payload is `KgAnomalyProgressEvent` JSON.
        * `event: preview` -- payload is a raw `data:image/png;base64,...` data URI
          (intermediate subgraph snapshot; not JSON).
        * `event: result` -- payload is a `KgAnomalyStandardResponse` (standard mode)
          or `KgAnomalyMultiProxResponse` (multiprox mode / continue) JSON.
        * `event: error` -- payload is an error object with `code` and `message`.

    KgAnomalyProgressEvent:
      type: object
      required: [type, stage]
      properties:
        type:
          type: string
          enum: [progress]
        stage:
          type: string
          description: Current phase (e.g. "denoise", "noise", "refine")
        step:
          type: integer
          description: Current step within the stage
        total:
          type: integer
          description: Total steps in the stage
        kg_log_likelihood:
          type: number
          nullable: true
          description: >
            Mean log-sigmoid score from the frozen KG embedder + link ranker
            applied to the edges currently present in the argmax reconstruction.
            Higher = cleaner. Present only on frame-boundary events.
        kg_log_likelihood_step:
          type: integer
          nullable: true
          description: Step index that `kg_log_likelihood` corresponds to.