Spaces:

Bani57
/

website

Sleeping

Andrej Janchevski commited on Apr 14

Commit

aaf56bb

1 Parent(s): acde928

feat(kg-anomaly): add correct/continue endpoints with SSE streaming

Implements the two remaining inference endpoints for the KG anomaly
feature, following the MultiProxAn graph-generation pattern:

- POST /kg-anomaly/correct: standard denoising (correct/generate tasks)
or MultiProx Gibbs init; returns SSE stream with progress, preview,
and terminal result events (before/after images, chain GIF, diff).
- POST /kg-anomaly/continue: advances a MultiProx session one step.
- Adds kg_anomaly_inference.py with tensor building, change detection,
directed subgraph rendering (PIL + networkx), and an apply_edge_noise
helper that task-aware forward-diffuses edges for demo input.
- Extends GET /kg-anomaly/datasets/{id}/sample-subgraphs with
noise_level/task/seed query params so callers can fetch pre-noised
subgraphs ready for correction.
- Registry: adds DiscreteDenoisingDiffusionKG loader that reconstructs
dataset_infos from checkpoint state_dict shapes + COINs experiment.
- Adds graph_generation/src to sys.path for bare imports inside the
research module.
- Updates OpenAPI spec, Postman collection (pre-noised example bodies,
noised-subgraph GET variants, auto-chaining for multiprox), and the
backend README endpoint table.

Files changed (8) hide show

docs/api.yaml +175 -54
docs/postman/collection.json +64 -8
src/backend/README.md +5 -5
src/backend/api/services/kg_anomaly_inference.py +708 -0
src/backend/api/services/registry.py +239 -2
src/backend/api/urls.py +8 -1
src/backend/api/views/kg_anomaly.py +134 -2
src/backend/research_api/settings.py +2 -1

docs/api.yaml CHANGED Viewed

@@ -62,6 +62,40 @@ paths:
               schema:
                 $ref: "#/components/schemas/MethodsResponse"
   # -- COINs -----------------------------------------------------------
   /coins/datasets:
     get:
@@ -279,15 +313,19 @@ paths:
     post:
       operationId: graphGenGenerate
       tags: [graph-generation]
-      summary: Generate a graph
       description: |
-        **Standard mode**: runs full diffusion (T->0), returns animated GIF of
-        the denoising chain + final PNG. Frontend plays the GIF once.
-        **MultiProx mode**: starts a session, runs the first Gibbs iteration,
-        returns step 0 image + an opaque `state` blob. Use the
-        `/graph-generation/continue` endpoint with that state to advance
-        one step at a time.
       requestBody:
         required: true
         content:
@@ -318,30 +356,11 @@ paths:
                     t_prime: 0.1
       responses:
         "200":
-          description: Generated graph (standard) or session step 0 (multiprox)
           content:
-            application/json:
               schema:
-                oneOf:
-                  - $ref: "#/components/schemas/GraphGenStandardResponse"
-                  - $ref: "#/components/schemas/GraphGenMultiProxResponse"
-              examples:
-                standard:
-                  summary: Standard generation result
-                  value:
-                    dataset_id: qm9
-                    model_type: discrete
-                    sampling_mode: standard
-                    image: "data:image/png;base64,..."
-                    chain_gif: "data:image/gif;base64,..."
-                    inference_time_ms: 3200
-                multiprox:
-                  summary: MultiProx session started
-                  value:
-                    state: "base64-encoded-diffusion-state..."
-                    step: 0
-                    image: "data:image/png;base64,..."
-                    inference_time_ms: 800
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "429":
@@ -353,11 +372,14 @@ paths:
     post:
       operationId: graphGenContinue
       tags: [graph-generation]
-      summary: Advance MultiProx generation by one step
       description: |
-        Advances the MultiProx multi-measurement chain by one Gibbs iteration.
-        The client must send back the opaque `state` from the previous step's
-        response. This keeps the API fully stateless - no server-side sessions.
       requestBody:
         required: true
         content:
@@ -366,11 +388,11 @@ paths:
               $ref: "#/components/schemas/GraphGenContinueRequest"
       responses:
         "200":
-          description: Step result
           content:
-            application/json:
               schema:
-                $ref: "#/components/schemas/GraphGenMultiProxResponse"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "429":
@@ -396,7 +418,13 @@ paths:
       operationId: getKgAnomalySampleSubgraphs
       tags: [kg-anomaly]
       summary: Get example subgraphs for correction
-      description: Returns pre-computed example subgraphs from the test set.
       parameters:
         - $ref: "#/components/parameters/KgAnomalyDatasetId"
         - name: count
@@ -407,6 +435,32 @@ paths:
             maximum: 10
             default: 5
           description: Number of sample subgraphs to return
       responses:
         "200":
           description: Sample subgraphs
@@ -421,12 +475,19 @@ paths:
     post:
       operationId: kgAnomalyCorrect
       tags: [kg-anomaly]
-      summary: Correct a KG subgraph
       description: |
-        **Standard mode**: runs full diffusion correction, returns animated GIF
-        of the process + before/after images + structured diff.
-        **MultiProx mode** (future): starts a session for step-by-step correction.
       requestBody:
         required: true
         content:
@@ -476,13 +537,11 @@ paths:
                     t_prime: 0.1
       responses:
         "200":
-          description: Correction result
           content:
-            application/json:
               schema:
-                oneOf:
-                  - $ref: "#/components/schemas/KgAnomalyStandardResponse"
-                  - $ref: "#/components/schemas/KgAnomalyMultiProxResponse"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "404":
@@ -498,11 +557,13 @@ paths:
     post:
       operationId: kgAnomalyContinue
       tags: [kg-anomaly]
-      summary: Advance MultiProx correction by one step (future)
       description: |
-        Advances the MultiProx correction chain by one Gibbs iteration.
-        The client must send back the opaque `state` from the previous step's
-        response. Returns the updated subgraph image and current diff.
       requestBody:
         required: true
         content:
@@ -511,11 +572,11 @@ paths:
               $ref: "#/components/schemas/KgAnomalyContinueRequest"
       responses:
         "200":
-          description: Step result
           content:
-            application/json:
               schema:
-                $ref: "#/components/schemas/KgAnomalyMultiProxResponse"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "429":
@@ -1291,6 +1352,36 @@ components:
           format: float
           example: 800
     # -- KG Anomaly Correction: Discovery --
     KgAnomalyDatasetsResponse:
       type: object
@@ -1411,7 +1502,7 @@ components:
           $ref: "#/components/schemas/SamplingModeEnum"
         task:
           $ref: "#/components/schemas/KgAnomalyTaskEnum"
-          default: inpaint
           description: |
             "generate" = ignore the input subgraph edges and generate a new subgraph from scratch.
             "correct" (default) = keep fixed edges unchanged, only correct the masked (anomalous) edges.
@@ -1570,3 +1661,33 @@ components:
         removed:
           type: integer
           example: 0

               schema:
                 $ref: "#/components/schemas/MethodsResponse"
+  /debug/force-unlock:
+    post:
+      operationId: forceUnlockInferenceLock
+      tags: [health]
+      summary: Release a stuck inference lock (debug only)
+      description: |
+        Forcibly releases the global inference lock. Only available when
+        the server is running with `DJANGO_DEBUG=True`; returns `403` in
+        production. Use when a crashed request left the lock held and
+        subsequent requests are returning `429 INFERENCE_BUSY`.
+      responses:
+        "200":
+          description: Lock release result
+          content:
+            application/json:
+              schema:
+                type: object
+                required: [released]
+                properties:
+                  released:
+                    type: boolean
+                    description: True if a held lock was released; false if the lock was already free.
+                    example: true
+        "403":
+          description: Not available outside debug mode
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    example: only available in debug mode
   # -- COINs -----------------------------------------------------------
   /coins/datasets:
     get:
     post:
       operationId: graphGenGenerate
       tags: [graph-generation]
+      summary: Generate a graph (SSE streaming)
       description: |
+        Server-Sent Events stream (`text/event-stream`). Emits `progress`
+        events during diffusion, optional `preview` events with intermediate
+        PNGs, and a terminal `result` event whose `data` payload is the JSON
+        described below.
+        **Standard mode**: runs full diffusion (T->0); terminal `result`
+        payload conforms to `GraphGenStandardResponse` (animated GIF + final PNG).
+        **MultiProx mode**: runs the first Gibbs iteration; terminal `result`
+        payload conforms to `GraphGenMultiProxResponse` and includes an opaque
+        `state` blob to be passed to `/graph-generation/continue`.
       requestBody:
         required: true
         content:
                     t_prime: 0.1
       responses:
         "200":
+          description: SSE stream of progress/preview events terminated by a result event
           content:
+            text/event-stream:
               schema:
+                $ref: "#/components/schemas/GraphGenSseStream"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "429":
     post:
       operationId: graphGenContinue
       tags: [graph-generation]
+      summary: Advance MultiProx generation by one step (SSE streaming)
       description: |
+        SSE stream (`text/event-stream`). Advances the MultiProx
+        multi-measurement chain by one Gibbs iteration. The client must send
+        back the opaque `state` from the previous step's `result` event.
+        Emits `progress` and `preview` events, then a terminal `result` event
+        with the `GraphGenMultiProxResponse` payload (including the updated
+        `state`). The API remains fully stateless -- no server-side sessions.
       requestBody:
         required: true
         content:
               $ref: "#/components/schemas/GraphGenContinueRequest"
       responses:
         "200":
+          description: SSE stream of progress/preview events terminated by a result event
           content:
+            text/event-stream:
               schema:
+                $ref: "#/components/schemas/GraphGenSseStream"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "429":
       operationId: getKgAnomalySampleSubgraphs
       tags: [kg-anomaly]
       summary: Get example subgraphs for correction
+      description: |
+        Returns pre-computed example subgraphs from the test set. When
+        `noise_level` is supplied, the model's forward diffusion is applied
+        to each subgraph's edges so the caller receives a corrupted input
+        ready for `/kg-anomaly/correct`. For `task=correct` only the edges
+        inside the inpaint mask (second half of nodes) are noised; for
+        `task=generate` every edge is noised.
       parameters:
         - $ref: "#/components/parameters/KgAnomalyDatasetId"
         - name: count
             maximum: 10
             default: 5
           description: Number of sample subgraphs to return
+        - name: noise_level
+          in: query
+          required: false
+          schema:
+            type: number
+            minimum: 0.0
+            exclusiveMinimum: true
+            maximum: 1.0
+          description: |
+            Fraction of the full diffusion horizon T at which to sample
+            noised edges (e.g. 0.4 for moderate corruption). Omit to receive
+            the clean subgraphs.
+        - name: task
+          in: query
+          required: false
+          schema:
+            type: string
+            enum: [correct, generate]
+            default: correct
+          description: Task the noise should align with. Ignored if noise_level is not set.
+        - name: seed
+          in: query
+          required: false
+          schema:
+            type: integer
+          description: Optional RNG seed for reproducible noise.
       responses:
         "200":
           description: Sample subgraphs
     post:
       operationId: kgAnomalyCorrect
       tags: [kg-anomaly]
+      summary: Correct a KG subgraph (SSE streaming)
       description: |
+        Server-Sent Events stream (`text/event-stream`). Emits `progress`
+        events during diffusion, optional `preview` events with intermediate
+        PNGs, and a terminal `result` event whose `data` payload is the JSON
+        described below.
+        **Standard mode**: runs full diffusion correction; terminal `result`
+        payload conforms to `KgAnomalyStandardResponse`.
+        **MultiProx mode**: runs the first Gibbs iteration; terminal `result`
+        payload conforms to `KgAnomalyMultiProxResponse` and includes an
+        opaque `state` blob to be passed to `/kg-anomaly/continue`.
       requestBody:
         required: true
         content:
                     t_prime: 0.1
       responses:
         "200":
+          description: SSE stream of progress/preview events terminated by a result event
           content:
+            text/event-stream:
               schema:
+                $ref: "#/components/schemas/KgAnomalySseStream"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "404":
     post:
       operationId: kgAnomalyContinue
       tags: [kg-anomaly]
+      summary: Advance MultiProx correction by one step (SSE streaming)
       description: |
+        SSE stream (`text/event-stream`). Advances the MultiProx correction
+        chain by one Gibbs iteration. The client must send back the opaque
+        `state` from the previous step's `result` event. Emits `progress`
+        and `preview` events, then a terminal `result` event with the
+        `KgAnomalyMultiProxResponse` payload (including the updated `state`).
       requestBody:
         required: true
         content:
               $ref: "#/components/schemas/KgAnomalyContinueRequest"
       responses:
         "200":
+          description: SSE stream of progress/preview events terminated by a result event
           content:
+            text/event-stream:
               schema:
+                $ref: "#/components/schemas/KgAnomalySseStream"
         "400":
           $ref: "#/components/responses/InvalidRequest"
         "429":
           format: float
           example: 800
+    # -- Graph Generation SSE stream ----------------------------------
+    GraphGenSseStream:
+      type: string
+      description: |
+        SSE text stream. Each event is `event: <name>\ndata: <payload>\n\n`.
+        * `event: progress` -- payload is `GraphGenProgressEvent` JSON.
+        * `event: preview` -- payload is a raw `data:image/png;base64,...` data URI
+          (intermediate graph snapshot; not JSON).
+        * `event: result` -- payload is a `GraphGenStandardResponse` (standard mode)
+          or `GraphGenMultiProxResponse` (multiprox mode / continue) JSON.
+        * `event: error` -- payload is an error object with `code` and `message`.
+    GraphGenProgressEvent:
+      type: object
+      required: [type, stage]
+      properties:
+        type:
+          type: string
+          enum: [progress]
+        stage:
+          type: string
+          description: Current phase (e.g. "denoise", "noise", "refine")
+        step:
+          type: integer
+          description: Current step within the stage
+        total:
+          type: integer
+          description: Total steps in the stage
     # -- KG Anomaly Correction: Discovery --
     KgAnomalyDatasetsResponse:
       type: object
           $ref: "#/components/schemas/SamplingModeEnum"
         task:
           $ref: "#/components/schemas/KgAnomalyTaskEnum"
+          default: correct
           description: |
             "generate" = ignore the input subgraph edges and generate a new subgraph from scratch.
             "correct" (default) = keep fixed edges unchanged, only correct the masked (anomalous) edges.
         removed:
           type: integer
           example: 0
+    # -- KG Anomaly SSE stream ----------------------------------------
+    KgAnomalySseStream:
+      type: string
+      description: |
+        SSE text stream. Each event is `event: <name>\ndata: <payload>\n\n`.
+        * `event: progress` -- payload is `KgAnomalyProgressEvent` JSON.
+        * `event: preview` -- payload is a raw `data:image/png;base64,...` data URI
+          (intermediate subgraph snapshot; not JSON).
+        * `event: result` -- payload is a `KgAnomalyStandardResponse` (standard mode)
+          or `KgAnomalyMultiProxResponse` (multiprox mode / continue) JSON.
+        * `event: error` -- payload is an error object with `code` and `message`.
+    KgAnomalyProgressEvent:
+      type: object
+      required: [type, stage]
+      properties:
+        type:
+          type: string
+          enum: [progress]
+        stage:
+          type: string
+          description: Current phase (e.g. "denoise", "noise", "refine")
+        step:
+          type: integer
+          description: Current step within the stage
+        total:
+          type: integer
+          description: Total steps in the stage

docs/postman/collection.json CHANGED Viewed

@@ -612,7 +612,45 @@
                 { "key": "count", "value": "3" }
               ]
             },
-            "description": "Pre-computed example subgraphs for correction."
           }
         }
       ]
@@ -629,7 +667,7 @@
             ],
             "body": {
               "mode": "raw",
-              "raw": "{\n  \"dataset_id\": \"wordnet\",\n  \"sampling_mode\": \"standard\",\n  \"task\": \"correct\",\n  \"subgraph\": {\n    \"nodes\": [\n      {\"entity_id\": 11754, \"type_id\": 3},\n      {\"entity_id\": 5142, \"type_id\": 3},\n      {\"entity_id\": 8142, \"type_id\": 3}\n    ],\n    \"edges\": [\n      {\"source_idx\": 0, \"target_idx\": 1, \"relation_id\": 3},\n      {\"source_idx\": 1, \"target_idx\": 2, \"relation_id\": 1}\n    ]\n  },\n  \"diffusion_steps\": 500,\n  \"chain_frames\": 20\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/correct",
@@ -648,7 +686,7 @@
             ],
             "body": {
               "mode": "raw",
-              "raw": "{\n  \"dataset_id\": \"wordnet\",\n  \"sampling_mode\": \"standard\",\n  \"task\": \"generate\",\n  \"subgraph\": {\n    \"nodes\": [\n      {\"entity_id\": 11754, \"type_id\": 3},\n      {\"entity_id\": 5142, \"type_id\": 3}\n    ],\n    \"edges\": [\n      {\"source_idx\": 0, \"target_idx\": 1, \"relation_id\": 3}\n    ]\n  },\n  \"diffusion_steps\": 500,\n  \"chain_frames\": 20\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/correct",
@@ -659,7 +697,16 @@
           }
         },
         {
-          "name": "POST /kg-anomaly/correct (multiprox, future)",
           "request": {
             "method": "POST",
             "header": [
@@ -667,18 +714,27 @@
             ],
             "body": {
               "mode": "raw",
-              "raw": "{\n  \"dataset_id\": \"wordnet\",\n  \"sampling_mode\": \"multiprox\",\n  \"task\": \"correct\",\n  \"subgraph\": {\n    \"nodes\": [\n      {\"entity_id\": 11754, \"type_id\": 3},\n      {\"entity_id\": 5142, \"type_id\": 3}\n    ],\n    \"edges\": [\n      {\"source_idx\": 0, \"target_idx\": 1, \"relation_id\": 3}\n    ]\n  },\n  \"diffusion_steps\": 500,\n  \"multiprox_params\": {\n    \"m\": 10,\n    \"t\": 0.5,\n    \"t_prime\": 0.1\n  }\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/correct",
               "host": ["{{base_url}}"],
               "path": ["kg-anomaly", "correct"]
             },
-            "description": "MultiProx correction. Returns step 0 + state."
           }
         },
         {
           "name": "POST /kg-anomaly/continue",
           "request": {
             "method": "POST",
             "header": [
@@ -686,14 +742,14 @@
             ],
             "body": {
               "mode": "raw",
-              "raw": "{\n  \"state\": \"<paste state from previous response>\"\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/continue",
               "host": ["{{base_url}}"],
               "path": ["kg-anomaly", "continue"]
             },
-            "description": "Advance MultiProx correction by one step."
           }
         }
       ]

                 { "key": "count", "value": "3" }
               ]
             },
+            "description": "Pre-computed example subgraphs for correction (clean)."
+          }
+        },
+        {
+          "name": "GET /kg-anomaly/datasets/{id}/sample-subgraphs (noised, correct)",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": {
+              "raw": "{{base_url}}/kg-anomaly/datasets/wordnet/sample-subgraphs?count=3&noise_level=0.4&task=correct&seed=42",
+              "host": ["{{base_url}}"],
+              "path": ["kg-anomaly", "datasets", "wordnet", "sample-subgraphs"],
+              "query": [
+                { "key": "count", "value": "3" },
+                { "key": "noise_level", "value": "0.4" },
+                { "key": "task", "value": "correct" },
+                { "key": "seed", "value": "42" }
+              ]
+            },
+            "description": "Pre-noised example subgraphs for the 'correct' task (only inpaint-mask region is corrupted)."
+          }
+        },
+        {
+          "name": "GET /kg-anomaly/datasets/{id}/sample-subgraphs (noised, generate)",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": {
+              "raw": "{{base_url}}/kg-anomaly/datasets/wordnet/sample-subgraphs?count=3&noise_level=0.4&task=generate&seed=43",
+              "host": ["{{base_url}}"],
+              "path": ["kg-anomaly", "datasets", "wordnet", "sample-subgraphs"],
+              "query": [
+                { "key": "count", "value": "3" },
+                { "key": "noise_level", "value": "0.4" },
+                { "key": "task", "value": "generate" },
+                { "key": "seed", "value": "43" }
+              ]
+            },
+            "description": "Pre-noised example subgraphs for the 'generate' task (all edges corrupted)."
           }
         }
       ]
             ],
             "body": {
               "mode": "raw",
+              "raw": "{\n  \"dataset_id\": \"wordnet\",\n  \"sampling_mode\": \"standard\",\n  \"task\": \"correct\",\n  \"subgraph\": {\n    \"nodes\": [\n      {\"entity_id\": 28155, \"type_id\": 1},\n      {\"entity_id\": 29348, \"type_id\": 4},\n      {\"entity_id\": 29358, \"type_id\": 1},\n      {\"entity_id\": 36247, \"type_id\": 1},\n      {\"entity_id\": 36248, \"type_id\": 4},\n      {\"entity_id\": 36855, \"type_id\": 1},\n      {\"entity_id\": 36858, \"type_id\": 4},\n      {\"entity_id\": 36860, \"type_id\": 4},\n      {\"entity_id\": 36881, \"type_id\": 1},\n      {\"entity_id\": 39993, \"type_id\": 1}\n    ],\n    \"edges\": [\n      {\"source_idx\": 1, \"target_idx\": 2, \"relation_id\": 1},\n      {\"source_idx\": 1, \"target_idx\": 3, \"relation_id\": 1},\n      {\"source_idx\": 2, \"target_idx\": 1, \"relation_id\": 1},\n      {\"source_idx\": 2, \"target_idx\": 4, \"relation_id\": 1},\n      {\"source_idx\": 3, \"target_idx\": 1, \"relation_id\": 1},\n      {\"source_idx\": 3, \"target_idx\": 4, \"relation_id\": 1},\n      {\"source_idx\": 4, \"target_idx\": 2, \"relation_id\": 1},\n      {\"source_idx\": 4, \"target_idx\": 3, \"relation_id\": 1},\n      {\"source_idx\": 5, \"target_idx\": 6, \"relation_id\": 2},\n      {\"source_idx\": 5, \"target_idx\": 7, \"relation_id\": 1},\n      {\"source_idx\": 5, \"target_idx\": 8, \"relation_id\": 5},\n      {\"source_idx\": 5, \"target_idx\": 9, \"relation_id\": 4},\n      {\"source_idx\": 6, \"target_idx\": 4, \"relation_id\": 3},\n      {\"source_idx\": 6, \"target_idx\": 5, \"relation_id\": 1},\n      {\"source_idx\": 6, \"target_idx\": 8, \"relation_id\": 1},\n      {\"source_idx\": 7, \"target_idx\": 5, \"relation_id\": 10},\n      {\"source_idx\": 7, \"target_idx\": 6, \"relation_id\": 10},\n      {\"source_idx\": 8, \"target_idx\": 5, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 6, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 7, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 9, \"relation_id\": 1},\n      {\"source_idx\": 9, \"target_idx\": 0, \"relation_id\": 3},\n      {\"source_idx\": 9, \"target_idx\": 6, \"relation_id\": 10},\n      {\"source_idx\": 9, \"target_idx\": 7, \"relation_id\": 3},\n      {\"source_idx\": 9, \"target_idx\": 8, \"relation_id\": 10}\n    ]\n  },\n  \"diffusion_steps\": 500,\n  \"chain_frames\": 20\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/correct",
             ],
             "body": {
               "mode": "raw",
+              "raw": "{\n  \"dataset_id\": \"wordnet\",\n  \"sampling_mode\": \"standard\",\n  \"task\": \"generate\",\n  \"subgraph\": {\n    \"nodes\": [\n      {\"entity_id\": 28155, \"type_id\": 1},\n      {\"entity_id\": 29348, \"type_id\": 4},\n      {\"entity_id\": 29358, \"type_id\": 1},\n      {\"entity_id\": 36247, \"type_id\": 1},\n      {\"entity_id\": 36248, \"type_id\": 4},\n      {\"entity_id\": 36855, \"type_id\": 1},\n      {\"entity_id\": 36858, \"type_id\": 4},\n      {\"entity_id\": 36860, \"type_id\": 4},\n      {\"entity_id\": 36881, \"type_id\": 1},\n      {\"entity_id\": 39993, \"type_id\": 1}\n    ],\n    \"edges\": [\n      {\"source_idx\": 0, \"target_idx\": 2, \"relation_id\": 10},\n      {\"source_idx\": 0, \"target_idx\": 4, \"relation_id\": 2},\n      {\"source_idx\": 1, \"target_idx\": 0, \"relation_id\": 0},\n      {\"source_idx\": 1, \"target_idx\": 2, \"relation_id\": 2},\n      {\"source_idx\": 1, \"target_idx\": 3, \"relation_id\": 1},\n      {\"source_idx\": 1, \"target_idx\": 5, \"relation_id\": 9},\n      {\"source_idx\": 1, \"target_idx\": 7, \"relation_id\": 7},\n      {\"source_idx\": 1, \"target_idx\": 8, \"relation_id\": 3},\n      {\"source_idx\": 2, \"target_idx\": 0, \"relation_id\": 7},\n      {\"source_idx\": 2, \"target_idx\": 1, \"relation_id\": 1},\n      {\"source_idx\": 2, \"target_idx\": 4, \"relation_id\": 5},\n      {\"source_idx\": 2, \"target_idx\": 8, \"relation_id\": 10},\n      {\"source_idx\": 2, \"target_idx\": 9, \"relation_id\": 2},\n      {\"source_idx\": 3, \"target_idx\": 1, \"relation_id\": 1},\n      {\"source_idx\": 3, \"target_idx\": 4, \"relation_id\": 1},\n      {\"source_idx\": 3, \"target_idx\": 5, \"relation_id\": 7},\n      {\"source_idx\": 3, \"target_idx\": 6, \"relation_id\": 6},\n      {\"source_idx\": 3, \"target_idx\": 7, \"relation_id\": 0},\n      {\"source_idx\": 4, \"target_idx\": 2, \"relation_id\": 1},\n      {\"source_idx\": 4, \"target_idx\": 3, \"relation_id\": 6},\n      {\"source_idx\": 4, \"target_idx\": 6, \"relation_id\": 7},\n      {\"source_idx\": 4, \"target_idx\": 7, \"relation_id\": 7},\n      {\"source_idx\": 5, \"target_idx\": 4, \"relation_id\": 2},\n      {\"source_idx\": 5, \"target_idx\": 6, \"relation_id\": 2},\n      {\"source_idx\": 5, \"target_idx\": 7, \"relation_id\": 6},\n      {\"source_idx\": 5, \"target_idx\": 8, \"relation_id\": 1},\n      {\"source_idx\": 5, \"target_idx\": 9, \"relation_id\": 1},\n      {\"source_idx\": 6, \"target_idx\": 0, \"relation_id\": 5},\n      {\"source_idx\": 6, \"target_idx\": 3, \"relation_id\": 7},\n      {\"source_idx\": 6, \"target_idx\": 4, \"relation_id\": 3},\n      {\"source_idx\": 6, \"target_idx\": 5, \"relation_id\": 1},\n      {\"source_idx\": 6, \"target_idx\": 7, \"relation_id\": 0},\n      {\"source_idx\": 6, \"target_idx\": 8, \"relation_id\": 1},\n      {\"source_idx\": 6, \"target_idx\": 9, \"relation_id\": 4},\n      {\"source_idx\": 7, \"target_idx\": 2, \"relation_id\": 10},\n      {\"source_idx\": 7, \"target_idx\": 5, \"relation_id\": 5},\n      {\"source_idx\": 7, \"target_idx\": 6, \"relation_id\": 5},\n      {\"source_idx\": 8, \"target_idx\": 0, \"relation_id\": 0},\n      {\"source_idx\": 8, \"target_idx\": 2, \"relation_id\": 6},\n      {\"source_idx\": 8, \"target_idx\": 4, \"relation_id\": 10},\n      {\"source_idx\": 8, \"target_idx\": 5, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 6, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 7, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 9, \"relation_id\": 0},\n      {\"source_idx\": 9, \"target_idx\": 0, \"relation_id\": 3},\n      {\"source_idx\": 9, \"target_idx\": 1, \"relation_id\": 8},\n      {\"source_idx\": 9, \"target_idx\": 3, \"relation_id\": 5},\n      {\"source_idx\": 9, \"target_idx\": 7, \"relation_id\": 5},\n      {\"source_idx\": 9, \"target_idx\": 8, \"relation_id\": 1}\n    ]\n  },\n  \"diffusion_steps\": 500,\n  \"chain_frames\": 20\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/correct",
           }
         },
         {
+          "name": "POST /kg-anomaly/correct (multiprox init)",
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": ["var body = pm.response.text();", "var lines = body.split('\\n');", "for (var i = 0; i < lines.length; i++) {", "    if (lines[i].trim() === 'event: result' && i + 1 < lines.length) {", "        var dataLine = lines[i + 1].replace(/^data: /, '');", "        try {", "            var result = JSON.parse(dataLine);", "            if (result.state) { pm.collectionVariables.set('multiprox_state', result.state); }", "        } catch (e) {}", "        break;", "    }", "}"]
+              }
+            }
+          ],
           "request": {
             "method": "POST",
             "header": [
             ],
             "body": {
               "mode": "raw",
+              "raw": "{\n  \"dataset_id\": \"wordnet\",\n  \"sampling_mode\": \"multiprox\",\n  \"task\": \"correct\",\n  \"subgraph\": {\n    \"nodes\": [\n      {\"entity_id\": 28155, \"type_id\": 1},\n      {\"entity_id\": 29348, \"type_id\": 4},\n      {\"entity_id\": 29358, \"type_id\": 1},\n      {\"entity_id\": 36247, \"type_id\": 1},\n      {\"entity_id\": 36248, \"type_id\": 4},\n      {\"entity_id\": 36855, \"type_id\": 1},\n      {\"entity_id\": 36858, \"type_id\": 4},\n      {\"entity_id\": 36860, \"type_id\": 4},\n      {\"entity_id\": 36881, \"type_id\": 1},\n      {\"entity_id\": 39993, \"type_id\": 1}\n    ],\n    \"edges\": [\n      {\"source_idx\": 1, \"target_idx\": 2, \"relation_id\": 1},\n      {\"source_idx\": 1, \"target_idx\": 3, \"relation_id\": 1},\n      {\"source_idx\": 2, \"target_idx\": 1, \"relation_id\": 1},\n      {\"source_idx\": 2, \"target_idx\": 4, \"relation_id\": 1},\n      {\"source_idx\": 3, \"target_idx\": 1, \"relation_id\": 1},\n      {\"source_idx\": 3, \"target_idx\": 4, \"relation_id\": 1},\n      {\"source_idx\": 4, \"target_idx\": 2, \"relation_id\": 1},\n      {\"source_idx\": 4, \"target_idx\": 3, \"relation_id\": 1},\n      {\"source_idx\": 5, \"target_idx\": 6, \"relation_id\": 2},\n      {\"source_idx\": 5, \"target_idx\": 7, \"relation_id\": 1},\n      {\"source_idx\": 5, \"target_idx\": 8, \"relation_id\": 5},\n      {\"source_idx\": 5, \"target_idx\": 9, \"relation_id\": 4},\n      {\"source_idx\": 6, \"target_idx\": 4, \"relation_id\": 3},\n      {\"source_idx\": 6, \"target_idx\": 5, \"relation_id\": 1},\n      {\"source_idx\": 6, \"target_idx\": 8, \"relation_id\": 1},\n      {\"source_idx\": 7, \"target_idx\": 5, \"relation_id\": 10},\n      {\"source_idx\": 7, \"target_idx\": 6, \"relation_id\": 10},\n      {\"source_idx\": 8, \"target_idx\": 5, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 6, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 7, \"relation_id\": 1},\n      {\"source_idx\": 8, \"target_idx\": 9, \"relation_id\": 1},\n      {\"source_idx\": 9, \"target_idx\": 0, \"relation_id\": 3},\n      {\"source_idx\": 9, \"target_idx\": 6, \"relation_id\": 10},\n      {\"source_idx\": 9, \"target_idx\": 7, \"relation_id\": 3},\n      {\"source_idx\": 9, \"target_idx\": 8, \"relation_id\": 10}\n    ]\n  },\n  \"multiprox_params\": {\n    \"n\": 10,\n    \"m\": 100,\n    \"t\": 0.4,\n    \"t_prime\": 0.1,\n    \"gibbs_chain_freq\": 10\n  }\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/correct",
               "host": ["{{base_url}}"],
               "path": ["kg-anomaly", "correct"]
             },
+            "description": "MultiProx Gibbs init on wordnet correction. SSE stream; the result event's state blob is auto-saved to {{multiprox_state}}."
           }
         },
         {
           "name": "POST /kg-anomaly/continue",
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": ["var body = pm.response.text();", "var lines = body.split('\\n');", "for (var i = 0; i < lines.length; i++) {", "    if (lines[i].trim() === 'event: result' && i + 1 < lines.length) {", "        var dataLine = lines[i + 1].replace(/^data: /, '');", "        try {", "            var result = JSON.parse(dataLine);", "            if (result.state) {", "                pm.collectionVariables.set('multiprox_state', result.state);", "                console.log('State updated (done=' + result.done + ', step=' + result.step + ')');", "            }", "        } catch (e) {}", "        break;", "    }", "}"]
+              }
+            }
+          ],
           "request": {
             "method": "POST",
             "header": [
             ],
             "body": {
               "mode": "raw",
+              "raw": "{\n  \"state\": \"{{multiprox_state}}\"\n}"
             },
             "url": {
               "raw": "{{base_url}}/kg-anomaly/continue",
               "host": ["{{base_url}}"],
               "path": ["kg-anomaly", "continue"]
             },
+            "description": "Advance MultiProx correction by one step. Uses {{multiprox_state}}; can be chained repeatedly."
           }
         }
       ]

src/backend/README.md CHANGED Viewed

@@ -88,17 +88,17 @@ All endpoints are prefixed with `/api/v1/`.
 |---|---|---|
 | `GET` | `/graph-generation/datasets` | List graph types with node/edge types |
 | `GET` | `/graph-generation/sampling-modes` | Sampling strategies with parameter specs |
-| `POST` | `/graph-generation/generate` | **Streaming NDJSON.** Generate a graph (standard denoising or MultiProx Gibbs init) |
-| `POST` | `/graph-generation/continue` | **Streaming NDJSON.** Advance a MultiProx Gibbs session by one step |
 ### KG Anomaly Correction
 | Method | Path | Description |
 |---|---|---|
 | `GET` | `/kg-anomaly/datasets` | List datasets with correction models |
-| `GET` | `/kg-anomaly/datasets/{id}/sample-subgraphs` | Pre-computed example subgraphs (`?count=5`) |
-| `POST` | `/kg-anomaly/correct` | Run correction (not yet implemented) |
-| `POST` | `/kg-anomaly/continue` | Continue MultiProx correction (not yet implemented) |
 ## Streaming Inference Protocol (SSE)

 |---|---|---|
 | `GET` | `/graph-generation/datasets` | List graph types with node/edge types |
 | `GET` | `/graph-generation/sampling-modes` | Sampling strategies with parameter specs |
+| `POST` | `/graph-generation/generate` | **Streaming SSE.** Generate a graph (standard denoising or MultiProx Gibbs init) |
+| `POST` | `/graph-generation/continue` | **Streaming SSE.** Advance a MultiProx Gibbs session by one step |
 ### KG Anomaly Correction
 | Method | Path | Description |
 |---|---|---|
 | `GET` | `/kg-anomaly/datasets` | List datasets with correction models |
+| `GET` | `/kg-anomaly/datasets/{id}/sample-subgraphs` | Pre-computed example subgraphs (`?count=5&noise_level=0.4&task=correct&seed=42`); noise is task-aware |
+| `POST` | `/kg-anomaly/correct` | **Streaming SSE.** Correct/regenerate a KG subgraph (standard denoising or MultiProx Gibbs init) |
+| `POST` | `/kg-anomaly/continue` | **Streaming SSE.** Advance a MultiProx correction session by one step |
 ## Streaming Inference Protocol (SSE)

src/backend/api/services/kg_anomaly_inference.py ADDED Viewed

	@@ -0,0 +1,708 @@

+import base64
+import io
+import time
+import torch
+import torch.nn.functional as F
+from api.services.graphgen_inference import (
+    _frames_to_gif_b64, _pil_to_b64,
+)
+STATE_BLOB_MAX_BYTES = 10 * 1024 * 1024  # 10 MB
+REQUIRED_STATE_KEYS = {
+    "X_given", "E", "y", "n_nodes", "dataset_id", "task", "X_index", "X_c",
+    "is_bip", "original_E_int", "T", "n", "m", "t", "t_prime",
+    "gibbs_chain_freq", "inner_step", "step",
+}
+CHANGE_COLORS = {
+    "unchanged": "#888888",
+    "modified": "#e67e22",
+    "added": "#27ae60",
+    "removed": "#e74c3c",
+}
+# ---------------------------------------------------------------------------
+# Input subgraph -> tensors
+# ---------------------------------------------------------------------------
+def build_kg_tensors(subgraph, loader, model):
+    """Convert API subgraph payload into model-ready tensors.
+    Returns a dict with X_given, E_given, y_given, X_index, X_c, n_nodes,
+    is_bip, node_mask — all on CPU, batch size 1.
+    """
+    nodes = subgraph["nodes"]
+    edges = subgraph["edges"]
+    n = len(nodes)
+    X_given = torch.zeros(1, n, model.Xdim_output, dtype=torch.float32)
+    for i, node in enumerate(nodes):
+        type_id = int(node.get("type_id", 0))
+        if 0 <= type_id < model.Xdim_output:
+            X_given[0, i, type_id] = 1.0
+        else:
+            X_given[0, i, 0] = 1.0
+    E_given = torch.zeros(1, n, n, model.Edim_output, dtype=torch.float32)
+    # Default to class 0 ("no edge") everywhere
+    E_given[0, :, :, 0] = 1.0
+    for e in edges:
+        src = int(e["source_idx"])
+        tgt = int(e["target_idx"])
+        rel = int(e["relation_id"])
+        e_class = rel + 1
+        if not (0 <= src < n and 0 <= tgt < n):
+            continue
+        if not (1 <= e_class < model.Edim_output):
+            continue
+        E_given[0, src, tgt, :] = 0.0
+        E_given[0, src, tgt, e_class] = 1.0
+    y_given = torch.zeros(1, 0, dtype=torch.float32)
+    X_index = torch.zeros(1, n, dtype=torch.long)
+    for i, node in enumerate(nodes):
+        X_index[0, i] = int(node["entity_id"])
+    X_c = torch.zeros(1, n, dtype=torch.long)
+    communities = getattr(loader, "communities", None)
+    if communities is not None:
+        for i, node in enumerate(nodes):
+            eid = int(node["entity_id"])
+            if 0 <= eid < len(communities):
+                X_c[0, i] = int(communities[eid])
+    n_nodes = torch.tensor([n], dtype=torch.long)
+    is_bip = torch.tensor([n > 20], dtype=torch.bool)
+    node_mask = torch.ones(1, n, dtype=torch.bool)
+    return {
+        "X_given": X_given, "E_given": E_given, "y_given": y_given,
+        "X_index": X_index, "X_c": X_c, "n_nodes": n_nodes,
+        "is_bip": is_bip, "node_mask": node_mask,
+    }
+def _to_device(t, device):
+    return t.to(device) if isinstance(t, torch.Tensor) else t
+def apply_edge_noise(model, tensors, task, noise_level, seed=None):
+    """Forward-diffuse the given subgraph's edges at t = noise_level * T.
+    For task="correct", only edges inside the inpaint mask (the second half of
+    nodes) are noised, matching what the correction endpoint will regenerate.
+    For task="generate", every edge slot is noised.
+    Returns a new list of {source_idx, target_idx, relation_id} dicts.
+    """
+    from graph_generation.src.utils import get_inpaint_mask
+    from graph_generation.src.diffusion import diffusion_utils
+    if not (0.0 < noise_level <= 1.0):
+        raise ValueError("noise_level must be in (0, 1]")
+    device = next(model.parameters()).device
+    X = tensors["X_given"].to(device)
+    E = tensors["E_given"].to(device)
+    node_mask = tensors["node_mask"].to(device)
+    is_bip = tensors["is_bip"].to(device)
+    n = int(tensors["n_nodes"].item())
+    if task == "generate":
+        bs, n_max = node_mask.shape
+        inpaint_mask = torch.ones(
+            bs, n_max, n_max, model.Edim_output, dtype=torch.bool, device=device)
+    else:
+        inpaint_mask = get_inpaint_mask(node_mask, is_bip, model.Edim_output, device)
+    T = model.T
+    t_int = torch.tensor([[int(noise_level * T)]], dtype=torch.float, device=device)
+    t_float = t_int / T
+    alpha_t_bar = model.noise_schedule.get_alpha_bar(t_normalized=t_float)
+    Qtb = model.transition_model.get_Qt_bar(alpha_t_bar, device=device)
+    probX = X @ Qtb.X
+    probE = E @ Qtb.E.unsqueeze(1)
+    if seed is not None:
+        torch.manual_seed(int(seed))
+    sampled = diffusion_utils.sample_discrete_features(
+        probX=probX, probE=probE, node_mask=node_mask)
+    E_noised = F.one_hot(sampled.E, num_classes=model.Edim_output).float()
+    E_mixed = E_noised * inpaint_mask + E * (~inpaint_mask)
+    E_int = E_mixed[0].argmax(dim=-1).cpu()
+    edges = []
+    for i in range(n):
+        for j in range(n):
+            if i == j:
+                continue
+            cls = int(E_int[i, j])
+            if cls == 0:
+                continue
+            edges.append({
+                "source_idx": i, "target_idx": j, "relation_id": cls - 1,
+            })
+    return edges
+# ---------------------------------------------------------------------------
+# Change detection
+# ---------------------------------------------------------------------------
+def compute_changes(original_E_int, corrected_E_int, num_nodes, loader):
+    """Compute before/after edge diff for a directed KG subgraph.
+    original_E_int / corrected_E_int: 2-D int tensors (n, n) where 0 = no edge
+    and classes 1..N are relation types. Returns {"edges": [...], "summary": {...}}.
+    """
+    _, _, inv_relations = loader.dataset.get_inverted_name_maps()
+    edges = []
+    summary = {"added": 0, "removed": 0, "modified": 0, "unchanged": 0}
+    orig = original_E_int.cpu().tolist()
+    corr = corrected_E_int.cpu().tolist()
+    for i in range(num_nodes):
+        for j in range(num_nodes):
+            if i == j:
+                continue
+            o = int(orig[i][j])
+            c = int(corr[i][j])
+            if o == 0 and c == 0:
+                continue
+            if o == c:
+                summary["unchanged"] += 1
+                edges.append({
+                    "source_idx": i, "target_idx": j, "change": "unchanged",
+                    "relation_id": c - 1,
+                    "relation_name": str(inv_relations.get(c - 1, c - 1)),
+                })
+                continue
+            if o == 0 and c > 0:
+                summary["added"] += 1
+                edges.append({
+                    "source_idx": i, "target_idx": j, "change": "added",
+                    "relation_id": c - 1,
+                    "relation_name": str(inv_relations.get(c - 1, c - 1)),
+                })
+            elif o > 0 and c == 0:
+                summary["removed"] += 1
+                edges.append({
+                    "source_idx": i, "target_idx": j, "change": "removed",
+                    "original_relation_id": o - 1,
+                    "original_relation_name": str(inv_relations.get(o - 1, o - 1)),
+                })
+            else:
+                summary["modified"] += 1
+                edges.append({
+                    "source_idx": i, "target_idx": j, "change": "modified",
+                    "original_relation_id": o - 1,
+                    "original_relation_name": str(inv_relations.get(o - 1, o - 1)),
+                    "relation_id": c - 1,
+                    "relation_name": str(inv_relations.get(c - 1, c - 1)),
+                })
+    return {"edges": edges, "summary": summary}
+# ---------------------------------------------------------------------------
+# Rendering
+# ---------------------------------------------------------------------------
+def _format_entity_label(dataset_id, name):
+    s = str(name)
+    if dataset_id == "freebase":
+        s = s.replace("/m/", "")
+    elif dataset_id == "wordnet":
+        s = s.split(".")[0]
+    else:
+        if "concept" in s:
+            parts = s.split(":")
+            s = parts[-2] if "new" in s and len(parts) >= 2 else parts[-1]
+    if len(s) > 14:
+        s = s[:13] + "…"
+    return s
+def _format_relation_label(dataset_id, name):
+    s = str(name)
+    if dataset_id == "freebase":
+        parts = s.split(".")
+        s = ".".join(["_".join(p.split("/")[-2:]) for p in parts])
+    elif dataset_id == "wordnet":
+        s = s[1:] if s.startswith("_") else s
+    else:
+        if "concept" in s:
+            parts = s.split(":")
+            s = parts[-2] if "new" in s and len(parts) >= 2 else parts[-1]
+    if len(s) > 16:
+        s = s[:15] + "…"
+    return s
+def render_kg_subgraph(E_int, num_nodes, X_index, dataset_id, loader, changes=None):
+    """Render a directed KG subgraph as a PIL image using networkx + PIL.
+    Does not use matplotlib (same reason as graphgen_inference: Windows thread safety).
+    """
+    import networkx as nx
+    from PIL import Image, ImageDraw, ImageFont
+    inv_nodes, _, inv_relations = loader.dataset.get_inverted_name_maps()
+    e = E_int.cpu().tolist()
+    xi = X_index.cpu().tolist()
+    G = nx.DiGraph()
+    for i in range(num_nodes):
+        G.add_node(i)
+    for i in range(num_nodes):
+        for j in range(num_nodes):
+            if i == j:
+                continue
+            if int(e[i][j]) > 0:
+                G.add_edge(i, j, rel=int(e[i][j]) - 1)
+    pos = nx.spring_layout(G, seed=42)
+    # Build change lookup: (i, j) -> change_type
+    change_lookup = {}
+    if changes is not None:
+        for entry in changes.get("edges", []):
+            change_lookup[(entry["source_idx"], entry["target_idx"])] = entry["change"]
+    size = 500
+    margin = 50
+    scale = (size - 2 * margin) / 2
+    cx, cy = size / 2, size / 2
+    pixel_pos = {k: (cx + v[0] * scale, cy + v[1] * scale) for k, v in pos.items()}
+    img = Image.new("RGB", (size, size), "white")
+    draw = ImageDraw.Draw(img)
+    try:
+        font = ImageFont.truetype("arial.ttf", 11)
+        small_font = ImageFont.truetype("arial.ttf", 9)
+    except (OSError, IOError):
+        font = ImageFont.load_default()
+        small_font = font
+    node_r = 10
+    # Draw edges first (so nodes overlay them)
+    # Include "removed" edges from change_lookup even if not in G
+    all_edges = set((i, j) for i, j in G.edges())
+    if changes is not None:
+        for (i, j), ct in change_lookup.items():
+            if ct == "removed":
+                all_edges.add((i, j))
+    for (i, j) in all_edges:
+        change_type = change_lookup.get((i, j))
+        color = CHANGE_COLORS.get(change_type, "#444444") if changes is not None else "#444444"
+        dashed = (change_type == "removed")
+        x0, y0 = pixel_pos[i]
+        x1, y1 = pixel_pos[j]
+        # Shorten line to not overlap node circles
+        dx, dy = x1 - x0, y1 - y0
+        dist = max(1.0, (dx * dx + dy * dy) ** 0.5)
+        ux, uy = dx / dist, dy / dist
+        sx, sy = x0 + ux * node_r, y0 + uy * node_r
+        ex, ey = x1 - ux * node_r, y1 - uy * node_r
+        if dashed:
+            _draw_dashed(draw, (sx, sy), (ex, ey), color, width=2, dash=6)
+        else:
+            draw.line([(sx, sy), (ex, ey)], fill=color, width=2)
+        # Arrowhead
+        _draw_arrowhead(draw, (ex, ey), (ux, uy), color)
+        # Relation label
+        if (i, j) in G.edges():
+            rel_id = G.edges[(i, j)]["rel"]
+            rel_name = _format_relation_label(dataset_id, inv_relations.get(rel_id, rel_id))
+            mx, my = (sx + ex) / 2, (sy + ey) / 2
+            draw.text((mx + 3, my - 5), rel_name, fill=color, font=small_font)
+    # Draw nodes
+    for i in range(num_nodes):
+        x, y = pixel_pos[i]
+        draw.ellipse([x - node_r, y - node_r, x + node_r, y + node_r],
+                     fill="#2ecc71", outline="#1a7a42")
+        eid = int(xi[i]) if i < len(xi) else i
+        label = _format_entity_label(dataset_id, inv_nodes.get(eid, eid))
+        draw.text((x + node_r + 2, y - 6), label, fill="#111111", font=font)
+    return img
+def _draw_arrowhead(draw, tip, direction, color):
+    import math
+    ux, uy = direction
+    angle = math.atan2(uy, ux)
+    ah_len = 7
+    ah_angle = math.radians(25)
+    x, y = tip
+    x1 = x - ah_len * math.cos(angle - ah_angle)
+    y1 = y - ah_len * math.sin(angle - ah_angle)
+    x2 = x - ah_len * math.cos(angle + ah_angle)
+    y2 = y - ah_len * math.sin(angle + ah_angle)
+    draw.polygon([(x, y), (x1, y1), (x2, y2)], fill=color)
+def _draw_dashed(draw, start, end, color, width=2, dash=6):
+    x0, y0 = start
+    x1, y1 = end
+    dx, dy = x1 - x0, y1 - y0
+    dist = max(1.0, (dx * dx + dy * dy) ** 0.5)
+    steps = int(dist // dash)
+    ux, uy = dx / dist, dy / dist
+    for k in range(steps):
+        if k % 2 == 1:
+            continue
+        sx = x0 + ux * dash * k
+        sy = y0 + uy * dash * k
+        ex = x0 + ux * dash * min(k + 1, steps)
+        ey = y0 + uy * dash * min(k + 1, steps)
+        draw.line([(sx, sy), (ex, ey)], fill=color, width=width)
+# ---------------------------------------------------------------------------
+# Shared inference helpers
+# ---------------------------------------------------------------------------
+def _build_inpaint_mask(task, node_mask, is_bip, E_out_dim, device):
+    from graph_generation.src.utils import get_inpaint_mask
+    if task == "generate":
+        bs, n_max = node_mask.shape
+        return torch.ones(bs, n_max, n_max, E_out_dim, dtype=torch.bool, device=device)
+    return get_inpaint_mask(node_mask, is_bip, E_out_dim, device)
+def _sample_initial_noise_kg(model, node_mask):
+    from graph_generation.src.diffusion import diffusion_utils
+    return diffusion_utils.sample_discrete_feature_noise(
+        limit_dist=model.limit_dist, node_mask=node_mask)
+def _collapse_final_kg(model, X, E, y, node_mask):
+    from graph_generation.src.utils import PlaceHolder
+    final = PlaceHolder(X=X, E=E, y=y).mask(node_mask, collapse=True)
+    return final.X.long(), final.E.long()
+# ---------------------------------------------------------------------------
+# Standard correction / generation
+# ---------------------------------------------------------------------------
+def run_standard_correction(model, tensors, dataset_id, task, loader,
+                             diffusion_steps, chain_frames):
+    device = next(model.parameters()).device
+    X_given = tensors["X_given"].to(device)
+    E_given = tensors["E_given"].to(device)
+    y_given = tensors["y_given"].to(device)
+    X_index = tensors["X_index"].to(device)
+    is_bip = tensors["is_bip"].to(device)
+    n_nodes = tensors["n_nodes"].to(device)
+    node_mask = tensors["node_mask"].to(device)
+    n_max = n_nodes.item()
+    inpaint_mask = _build_inpaint_mask(
+        task, node_mask, is_bip, model.Edim_output, device)
+    original_E_int = E_given[0].argmax(dim=-1).long()  # (n, n)
+    original_img = render_kg_subgraph(
+        original_E_int, n_max, X_index[0], dataset_id, loader, changes=None)
+    model_T = model.T
+    step_stride = max(1, model_T // diffusion_steps)
+    total_loop_steps = (model_T + step_stride - 1) // step_stride
+    frame_interval = max(1, total_loop_steps // chain_frames)
+    with torch.no_grad():
+        z_T = _sample_initial_noise_kg(model, node_mask)
+        if task != "generate":
+            z_T.E = z_T.E * inpaint_mask + E_given * (~inpaint_mask)
+        X, E, y = X_given, z_T.E, y_given
+        gif_frames = []
+        t0 = time.time()
+        emitted = 0
+        for s_idx in reversed(range(0, model_T, step_stride)):
+            t_idx = min(s_idx + step_stride, model_T)
+            s_t = (s_idx / model_T) * torch.ones((1, 1), device=device)
+            t_t = (t_idx / model_T) * torch.ones((1, 1), device=device)
+            sampled_s, discrete_s = model.sample_p_zs_given_zt(
+                s_t, t_t, X, E, y, X_index, node_mask, inpaint_mask)
+            X, E, y = sampled_s.X, sampled_s.E, sampled_s.y
+            emitted += 1
+            is_frame = (emitted % frame_interval == 0) or (s_idx == 0)
+            E_int_prev = discrete_s.E[0].long()
+            event = {
+                "type": "progress",
+                "phase": "denoise",
+                "step": emitted,
+                "total_steps": total_loop_steps,
+                "elapsed_ms": int((time.time() - t0) * 1000),
+            }
+            if is_frame:
+                frame = render_kg_subgraph(
+                    E_int_prev, n_max, X_index[0], dataset_id, loader)
+                gif_frames.append(frame)
+                event["preview"] = _pil_to_b64(frame)
+            yield event
+        X_final, E_final = _collapse_final_kg(model, X, E, y, node_mask)
+    corrected_E_int = E_final[0]
+    changes = compute_changes(original_E_int, corrected_E_int, n_max, loader)
+    corrected_img = render_kg_subgraph(
+        corrected_E_int, n_max, X_index[0], dataset_id, loader, changes=changes)
+    elapsed_ms = int((time.time() - t0) * 1000)
+    yield {
+        "type": "result",
+        "original_image": _pil_to_b64(original_img),
+        "corrected_image": _pil_to_b64(corrected_img),
+        "chain_gif": _frames_to_gif_b64(gif_frames),
+        "changes": changes,
+        "inference_time_ms": elapsed_ms,
+    }
+# ---------------------------------------------------------------------------
+# MultiProx correction / generation
+# ---------------------------------------------------------------------------
+def run_multiprox_correction_init(model, tensors, dataset_id, task, loader,
+                                   n, m, t, t_prime, gibbs_chain_freq):
+    device = next(model.parameters()).device
+    X_given = tensors["X_given"].to(device)
+    E_given = tensors["E_given"].to(device)
+    y_given = tensors["y_given"].to(device)
+    X_index = tensors["X_index"].to(device)
+    X_c = tensors["X_c"].to(device)
+    is_bip = tensors["is_bip"].to(device)
+    n_nodes = tensors["n_nodes"].to(device)
+    node_mask = tensors["node_mask"].to(device)
+    n_max = n_nodes.item()
+    inpaint_mask = _build_inpaint_mask(
+        task, node_mask, is_bip, model.Edim_output, device)
+    original_E_int = E_given[0].argmax(dim=-1).long()
+    original_img = render_kg_subgraph(
+        original_E_int, n_max, X_index[0], dataset_id, loader, changes=None)
+    t0 = time.time()
+    # Sample initial noise for each of M Gibbs chains
+    z_samples = []
+    with torch.no_grad():
+        for i in range(m):
+            z_i = _sample_initial_noise_kg(model, node_mask)
+            if task != "generate":
+                z_i.E = z_i.E * inpaint_mask + E_given * (~inpaint_mask)
+            z_samples.append(z_i)
+            if (i + 1) % max(1, m // 10) == 0 or i == m - 1:
+                yield {
+                    "type": "progress",
+                    "phase": "noise_init",
+                    "step": i + 1,
+                    "total_steps": m,
+                    "elapsed_ms": int((time.time() - t0) * 1000),
+                }
+        # Stack to (1, M, n, ...) tensors
+        E_ens = torch.stack([z.E for z in z_samples], dim=1)  # (1, M, n, n, Edim)
+        y_ens = torch.stack([z.y for z in z_samples], dim=1)  # (1, M, ydim)
+        # Aggregate for preview
+        agg_E = torch.median(E_ens, dim=1).values
+        agg_y = torch.median(y_ens.float(), dim=1).values
+        X_int, E_int = _collapse_final_kg(model, X_given, agg_E, agg_y, node_mask)
+    corrected_E_int = E_int[0]
+    changes = compute_changes(original_E_int, corrected_E_int, n_max, loader)
+    preview_img = render_kg_subgraph(
+        corrected_E_int, n_max, X_index[0], dataset_id, loader, changes=changes)
+    elapsed_ms = int((time.time() - t0) * 1000)
+    state = {
+        "X_given": X_given.cpu(),
+        "E": E_ens.cpu(),
+        "y": y_ens.cpu(),
+        "n_nodes": n_nodes.cpu(),
+        "dataset_id": dataset_id,
+        "task": task,
+        "X_index": X_index.cpu(),
+        "X_c": X_c.cpu(),
+        "is_bip": is_bip.cpu(),
+        "original_E_int": original_E_int.cpu(),
+        "T": model.T, "n": n, "m": m, "t": t, "t_prime": t_prime,
+        "gibbs_chain_freq": gibbs_chain_freq,
+        "inner_step": 0, "step": 0,
+    }
+    yield {
+        "type": "result",
+        "state": state,
+        "original_image": _pil_to_b64(original_img),
+        "image": _pil_to_b64(preview_img),
+        "changes": changes,
+        "inference_time_ms": elapsed_ms,
+    }
+def run_multiprox_correction_step(model, state, loader):
+    device = next(model.parameters()).device
+    dataset_id = state["dataset_id"]
+    task = state["task"]
+    X_given = state["X_given"].to(device)
+    E = state["E"].to(device)
+    y = state["y"].to(device)
+    X_index = state["X_index"].to(device)
+    is_bip = state["is_bip"].to(device)
+    n_nodes = state["n_nodes"].to(device)
+    original_E_int = state["original_E_int"].to(device)
+    T = state["T"]
+    n = state["n"]
+    m = state["m"]
+    t = state["t"]
+    t_prime = state["t_prime"]
+    gibbs_chain_freq = state["gibbs_chain_freq"]
+    inner_step = state["inner_step"]
+    step = state["step"]
+    n_max = int(n_nodes.item())
+    node_mask = torch.ones(1, n_max, dtype=torch.bool, device=device)
+    inpaint_mask = _build_inpaint_mask(task, node_mask, is_bip, model.Edim_output, device)
+    fixed_t_norm = t * torch.ones((1, 1), dtype=torch.float, device=device)
+    fixed_s_norm = fixed_t_norm - (1.0 / T)
+    steps_this_call = min(gibbs_chain_freq, m - inner_step)
+    t0 = time.time()
+    with torch.no_grad():
+        for i in range(steps_this_call):
+            k = inner_step + i
+            avg_E = torch.median(E, dim=1).values
+            avg_y = torch.median(y.float(), dim=1).values
+            denoised, _ = model.sample_p_zs_given_zt(
+                fixed_s_norm, fixed_t_norm, X_given, avg_E, avg_y,
+                X_index, node_mask, inpaint_mask)
+            old_t2 = model.gibbs_fixed_t_2
+            model.gibbs_fixed_t_2 = t  # safe: inference lock held by registry
+            noisy = model.apply_noise(
+                denoised.X, denoised.E, denoised.y, node_mask, inpaint_mask, gibbs=True)
+            model.gibbs_fixed_t_2 = old_t2
+            E[:, k] = noisy["E_t"]
+            y[:, k] = noisy["y_t"]
+            # Preview aggregate state
+            prev_E = torch.median(E, dim=1).values
+            prev_y = torch.median(y.float(), dim=1).values
+            _, prev_Ei = _collapse_final_kg(model, X_given, prev_E, prev_y, node_mask)
+            preview_img = render_kg_subgraph(
+                prev_Ei[0], n_max, X_index[0], dataset_id, loader)
+            yield {
+                "type": "progress",
+                "phase": "gibbs",
+                "step": i + 1,
+                "total_steps": steps_this_call,
+                "elapsed_ms": int((time.time() - t0) * 1000),
+                "preview": _pil_to_b64(preview_img),
+            }
+        new_inner_step = inner_step + steps_this_call
+        round_complete = new_inner_step >= m
+        if round_complete:
+            new_inner_step = 0
+            new_step = step + 1
+        else:
+            new_step = step
+        done = round_complete and new_step >= n
+        # Refinement pass — always produce a clean render
+        P = int((t - t_prime) * T) + 1
+        P = max(P, 1)
+        refine_preview_interval = max(1, P // 10)
+        cur_E = torch.median(E, dim=1).values
+        cur_y = torch.median(y.float(), dim=1).values
+        cur_X = X_given
+        for j in range(P):
+            s_ref = (t - (j + 1) / T) * torch.ones((1, 1), dtype=torch.float, device=device)
+            t_ref = (t - j / T) * torch.ones((1, 1), dtype=torch.float, device=device)
+            sampled, discrete_s = model.sample_p_zs_given_zt(
+                s_ref, t_ref, cur_X, cur_E, cur_y, X_index, node_mask, inpaint_mask)
+            cur_X, cur_E, cur_y = sampled.X, sampled.E, sampled.y
+            is_frame = (j + 1) % refine_preview_interval == 0 or j == P - 1
+            event = {
+                "type": "progress",
+                "phase": "refine",
+                "step": j + 1,
+                "total_steps": P,
+                "elapsed_ms": int((time.time() - t0) * 1000),
+            }
+            if is_frame:
+                event["preview"] = _pil_to_b64(render_kg_subgraph(
+                    discrete_s.E[0].long(), n_max, X_index[0], dataset_id, loader))
+            yield event
+        X_int, E_int = _collapse_final_kg(model, cur_X, cur_E, cur_y, node_mask)
+    corrected_E_int = E_int[0]
+    changes = compute_changes(original_E_int, corrected_E_int, n_max, loader)
+    corrected_img = render_kg_subgraph(
+        corrected_E_int, n_max, X_index[0], dataset_id, loader, changes=changes)
+    elapsed_ms = int((time.time() - t0) * 1000)
+    updated_state = {
+        **state,
+        "E": E.cpu(), "y": y.cpu(),
+        "step": new_step, "inner_step": new_inner_step,
+    }
+    yield {
+        "type": "result",
+        "state": updated_state,
+        "image": _pil_to_b64(corrected_img),
+        "changes": changes,
+        "round_complete": round_complete,
+        "done": done,
+        "inference_time_ms": elapsed_ms,
+    }
+# ---------------------------------------------------------------------------
+# State blob serialisation
+# ---------------------------------------------------------------------------
+def encode_state_blob(state):
+    buf = io.BytesIO()
+    torch.save(state, buf)
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+def decode_state_blob(b64_str):
+    try:
+        raw = base64.b64decode(b64_str)
+    except Exception:
+        raise ValueError("state is not valid base64")
+    if len(raw) > STATE_BLOB_MAX_BYTES:
+        raise ValueError(f"state blob exceeds {STATE_BLOB_MAX_BYTES // (1024 * 1024)} MB limit")
+    try:
+        state = torch.load(io.BytesIO(raw), weights_only=False)
+    except Exception as exc:
+        raise ValueError(f"state could not be deserialized: {exc}") from exc
+    missing = REQUIRED_STATE_KEYS - set(state.keys())
+    if missing:
+        raise ValueError(f"state missing keys: {missing}")
+    if not isinstance(state["E"], torch.Tensor) or state["E"].dim() != 5:
+        raise ValueError("state['E'] must be a 5-D tensor")
+    if not isinstance(state["X_given"], torch.Tensor) or state["X_given"].dim() != 3:
+        raise ValueError("state['X_given'] must be a 3-D tensor")
+    return state

src/backend/api/services/registry.py CHANGED Viewed

@@ -276,6 +276,7 @@ class ModelRegistry:
         self._coins_experiments = {}  # (dataset_id, algorithm) -> Experiment
         self._coins_loaders = {}      # (dataset_id, seed, leiden_resolution) -> full Loader
         self._graphgen_models = {}    # (dataset_id, model_type) -> loaded eval-mode model
     def force_release_inference_lock(self):
         """Emergency release for a stuck inference lock (e.g. client disconnect)."""
@@ -465,9 +466,12 @@ class ModelRegistry:
                         seed=seed, device="cpu", val_size=0.01, test_size=0.02,
                         community_method="leiden", leiden_resolution=leiden_resolution,
                     )
-                    # Free heavy arrays not needed for discovery endpoints
-                    _free_heavy_arrays(loader)
                     self.loaders[dataset_id] = loader
                     logger.info(
                         "Loader ready for %s: %d entities, %d relations, %d train triples",
                         dataset_id, loader.num_nodes, loader.num_relations, len(loader.train_edge_data),
@@ -885,6 +889,239 @@ class ModelRegistry:
         return _gen()
     # ---- COINs inference ---------------------------------------------------
     def coins_predict(self, dataset_id, algorithm, query_structure_id,

         self._coins_experiments = {}  # (dataset_id, algorithm) -> Experiment
         self._coins_loaders = {}      # (dataset_id, seed, leiden_resolution) -> full Loader
         self._graphgen_models = {}    # (dataset_id, model_type) -> loaded eval-mode model
+        self._kg_anomaly_models = {}  # (dataset_id, task) -> loaded eval-mode model
     def force_release_inference_lock(self):
         """Emergency release for a stuck inference lock (e.g. client disconnect)."""
                         seed=seed, device="cpu", val_size=0.01, test_size=0.02,
                         community_method="leiden", leiden_resolution=leiden_resolution,
                     )
                     self.loaders[dataset_id] = loader
+                    # Share this loader with _load_coins_experiment so experiments for the
+                    # same (dataset, seed, leiden_resolution) reuse it instead of reloading
+                    # the graph. Heavy arrays stay populated — they're needed by full
+                    # experiments (embedder/sampler/ranker) and by KG anomaly inference.
+                    self._coins_loaders[(dataset_id, seed, leiden_resolution)] = loader
                     logger.info(
                         "Loader ready for %s: %d entities, %d relations, %d train triples",
                         dataset_id, loader.num_nodes, loader.num_relations, len(loader.train_edge_data),
         return _gen()
+    # ---- KG anomaly (DiGress KG) inference --------------------------------
+    def _load_kg_anomaly_model(self, dataset_id, task):
+        """Load the DiGress KG checkpoint for (dataset_id, task), cached.
+        The KG checkpoint pickles only ``cfg`` via ``save_hyperparameters('cfg')``,
+        so we must reconstruct ``dataset_infos``, ``extra_features`` and
+        ``domain_features`` before constructing the model. Dims are inferred from
+        state_dict shapes; kg_experiment comes from the matching COINs experiment.
+        """
+        key = (dataset_id, task)
+        if key in self._kg_anomaly_models:
+            return self._kg_anomaly_models[key]
+        import torch
+        import torch.nn.parallel.distributed as _ddp_mod
+        suffix = "_correct" if task == "correct" else ""
+        ckpt_path = Path(settings.DIGRESS_KG_DIR) / "checkpoints" / f"{dataset_id}{suffix}.ckpt"
+        if not ckpt_path.exists():
+            from api.exceptions import ModelUnavailable
+            raise ModelUnavailable(f"KG anomaly checkpoint not found: {ckpt_path.name}")
+        logger.info("Loading KG anomaly model: dataset=%s task=%s", dataset_id, task)
+        # Load to CPU with DDP patching (same strategy as _safe_load_lightning_checkpoint)
+        _orig_set = _ddp_mod.DistributedDataParallel.__setstate__
+        _orig_get = _ddp_mod.DistributedDataParallel.__getstate__
+        _ddp_mod.DistributedDataParallel.__setstate__ = lambda self, state: self.__dict__.update(state)
+        _ddp_mod.DistributedDataParallel.__getstate__ = lambda self: self.__dict__
+        try:
+            ckpt = torch.load(str(ckpt_path), map_location="cpu", weights_only=False)
+        finally:
+            _ddp_mod.DistributedDataParallel.__setstate__ = _orig_set
+            _ddp_mod.DistributedDataParallel.__getstate__ = _orig_get
+        hparams = ckpt.get("hyper_parameters", {})
+        cfg = hparams.get("cfg") if isinstance(hparams, dict) else getattr(hparams, "cfg", None)
+        if cfg is None:
+            raise RuntimeError(f"KG anomaly checkpoint {ckpt_path.name} is missing 'cfg' in hyper_parameters")
+        state_dict = ckpt["state_dict"]
+        # Ensure the model's task matches the endpoint task.
+        try:
+            cfg.model.task = task
+        except Exception:
+            pass  # OmegaConf struct-mode tolerant: if already set, leave it
+        # Infer dims from state_dict
+        edim_output = state_dict["model.mlp_out_E.2.weight"].shape[0]
+        input_dim_x = state_dict["model.mlp_in_X.0.weight"].shape[1]
+        input_dim_e = state_dict["model.mlp_in_E.0.weight"].shape[1]
+        input_dim_y = state_dict["model.mlp_in_y.0.weight"].shape[1]
+        # Load COINs experiment — needed for kg_experiment and for num_node_types
+        experiment = self._load_coins_experiment(dataset_id, "transe")
+        xdim_output = experiment.loader.num_node_types
+        # Sanity: input_dim_e should equal edim_output (no extra E features for KG)
+        if input_dim_e != edim_output:
+            logger.warning(
+                "Unexpected mlp_in_E dim %d != edim_output %d for %s/%s",
+                input_dim_e, edim_output, dataset_id, task,
+            )
+        # Build mock dataset_infos
+        from graph_generation.src.diffusion.distributions import DistributionNodes
+        from graph_generation.src.diffusion.extra_features import (
+            DummyExtraFeatures, ExtraFeatures,
+        )
+        # max_num_nodes from dataset name (e.g. "freebase_20" -> 20, then *2 per kg_dataset.py)
+        try:
+            base_max = int(cfg.dataset.name.split("_")[-1])
+        except (AttributeError, ValueError):
+            base_max = 20
+        max_num_nodes = base_max * 2
+        # Histogram for DistributionNodes — uniform over possible node counts
+        n_hist = torch.ones(max_num_nodes + 1)
+        n_hist[:2] = 0  # at least 2 nodes
+        nodes_dist = DistributionNodes(n_hist)
+        class _MockDataModule:
+            def __init__(self, kg_experiment, max_num_nodes):
+                self.kg_experiment = kg_experiment
+                self.max_num_nodes = max_num_nodes
+        class _MockDatasetInfos:
+            pass
+        dataset_infos = _MockDatasetInfos()
+        dataset_infos.datamodule = _MockDataModule(experiment, max_num_nodes)
+        dataset_infos.input_dims = {"X": input_dim_x, "E": input_dim_e, "y": input_dim_y}
+        dataset_infos.output_dims = {"X": xdim_output, "E": edim_output, "y": 0}
+        dataset_infos.nodes_dist = nodes_dist
+        dataset_infos.max_n_nodes = max_num_nodes
+        dataset_infos.node_types = torch.ones(xdim_output, dtype=torch.float32)
+        dataset_infos.edge_types = torch.ones(edim_output, dtype=torch.float32)
+        # extra_features per cfg
+        extra_features_type = getattr(cfg.model, "extra_features", None)
+        if cfg.model.type == "discrete" and extra_features_type is not None:
+            extra_features = ExtraFeatures(extra_features_type, dataset_info=dataset_infos)
+        else:
+            extra_features = DummyExtraFeatures()
+        domain_features = DummyExtraFeatures()
+        from diffusion_model_discrete_kg import DiscreteDenoisingDiffusionKG as cls
+        _orig_save = cls.save_hyperparameters
+        cls.save_hyperparameters = lambda self, *a, **kw: None
+        try:
+            model = cls(cfg, dataset_infos, None, None, None, extra_features, domain_features)
+        finally:
+            cls.save_hyperparameters = _orig_save
+        missing, unexpected = model.load_state_dict(state_dict, strict=False)
+        if missing:
+            logger.debug("KG anomaly state_dict missing keys: %d (e.g. %s)",
+                         len(missing), missing[:3])
+        if unexpected:
+            logger.debug("KG anomaly state_dict unexpected keys: %d (e.g. %s)",
+                         len(unexpected), unexpected[:3])
+        del ckpt
+        model.to(settings.TORCH_DEVICE)
+        model.eval()
+        self._kg_anomaly_models[key] = model
+        logger.info("KG anomaly model ready: dataset=%s task=%s", dataset_id, task)
+        return model
+    def kg_anomaly_correct_stream(self, dataset_id, task, sampling_mode, subgraph,
+                                  diffusion_steps, chain_frames, multiprox_params):
+        """Return a generator of SSE event dicts for /kg-anomaly/correct."""
+        from api.exceptions import InferenceBusy
+        from api.services.kg_anomaly_inference import (
+            build_kg_tensors, encode_state_blob,
+            run_multiprox_correction_init, run_standard_correction,
+        )
+        if not self._inference_lock.acquire(blocking=False):
+            raise InferenceBusy()
+        self._inference_lock_owner = f"kg_anomaly_correct {dataset_id}/{task}/{sampling_mode}"
+        try:
+            model = self._load_kg_anomaly_model(dataset_id, task)
+            loader = self.loaders.get(dataset_id)
+            tensors = build_kg_tensors(subgraph, loader, model)
+        except Exception:
+            self._inference_lock_owner = None
+            self._inference_lock.release()
+            raise
+        def _gen():
+            try:
+                if sampling_mode == "standard":
+                    for event in run_standard_correction(
+                            model, tensors, dataset_id, task, loader,
+                            diffusion_steps, chain_frames):
+                        if event["type"] == "result":
+                            event.update({
+                                "dataset_id": dataset_id,
+                                "task": task,
+                                "sampling_mode": sampling_mode,
+                            })
+                        yield event
+                else:
+                    n = multiprox_params["n"]
+                    m = multiprox_params["m"]
+                    t = multiprox_params["t"]
+                    t_prime = multiprox_params["t_prime"]
+                    gibbs_chain_freq = multiprox_params["gibbs_chain_freq"]
+                    for event in run_multiprox_correction_init(
+                            model, tensors, dataset_id, task, loader,
+                            n, m, t, t_prime, gibbs_chain_freq):
+                        if event["type"] == "result":
+                            state = event.pop("state")
+                            event.update({
+                                "dataset_id": dataset_id,
+                                "task": task,
+                                "sampling_mode": sampling_mode,
+                                "step": 0,
+                                "round_complete": False,
+                                "done": False,
+                                "state": encode_state_blob(state),
+                            })
+                        yield event
+            finally:
+                self._inference_lock_owner = None
+                self._inference_lock.release()
+        return _gen()
+    def kg_anomaly_continue_stream(self, state_b64):
+        """Return a generator of SSE event dicts for /kg-anomaly/continue."""
+        from api.exceptions import InferenceBusy, InvalidRequestError
+        from api.services.kg_anomaly_inference import (
+            decode_state_blob, encode_state_blob, run_multiprox_correction_step,
+        )
+        try:
+            state = decode_state_blob(state_b64)
+        except ValueError as exc:
+            raise InvalidRequestError(str(exc))
+        if not self._inference_lock.acquire(blocking=False):
+            raise InferenceBusy()
+        self._inference_lock_owner = (
+            f"kg_anomaly_continue {state['dataset_id']}/{state['task']}"
+        )
+        try:
+            model = self._load_kg_anomaly_model(state["dataset_id"], state["task"])
+            loader = self.loaders.get(state["dataset_id"])
+        except Exception:
+            self._inference_lock_owner = None
+            self._inference_lock.release()
+            raise
+        def _gen():
+            try:
+                for event in run_multiprox_correction_step(model, state, loader):
+                    if event["type"] == "result":
+                        updated_state = event.pop("state")
+                        event.update({
+                            "dataset_id": updated_state["dataset_id"],
+                            "task": updated_state["task"],
+                            "step": updated_state["step"],
+                            "state": encode_state_blob(updated_state),
+                        })
+                    yield event
+            finally:
+                self._inference_lock_owner = None
+                self._inference_lock.release()
+        return _gen()
     # ---- COINs inference ---------------------------------------------------
     def coins_predict(self, dataset_id, algorithm, query_structure_id,

src/backend/api/urls.py CHANGED Viewed

@@ -13,7 +13,12 @@ from api.views.graph_generation import (
     GraphGenContinueView, GraphGenDatasetsView, GraphGenGenerateView, GraphGenSamplingModesView,
 )
 from api.views.health import ApiRootView, ForceUnlockView, HealthView, MethodsView
-from api.views.kg_anomaly import KgAnomalyDatasetsView, KgAnomalySampleSubgraphsView
 urlpatterns = [
     # Health & discovery
@@ -37,4 +42,6 @@ urlpatterns = [
     # KG anomaly
     path("kg-anomaly/datasets", KgAnomalyDatasetsView.as_view()),
     path("kg-anomaly/datasets/<str:dataset_id>/sample-subgraphs", KgAnomalySampleSubgraphsView.as_view()),
 ]

     GraphGenContinueView, GraphGenDatasetsView, GraphGenGenerateView, GraphGenSamplingModesView,
 )
 from api.views.health import ApiRootView, ForceUnlockView, HealthView, MethodsView
+from api.views.kg_anomaly import (
+    KgAnomalyContinueView,
+    KgAnomalyCorrectView,
+    KgAnomalyDatasetsView,
+    KgAnomalySampleSubgraphsView,
+)
 urlpatterns = [
     # Health & discovery
     # KG anomaly
     path("kg-anomaly/datasets", KgAnomalyDatasetsView.as_view()),
     path("kg-anomaly/datasets/<str:dataset_id>/sample-subgraphs", KgAnomalySampleSubgraphsView.as_view()),
+    path("kg-anomaly/correct", KgAnomalyCorrectView.as_view()),
+    path("kg-anomaly/continue", KgAnomalyContinueView.as_view()),
 ]

src/backend/api/views/kg_anomaly.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from rest_framework.response import Response
 from rest_framework.views import APIView
-from api.exceptions import NotFoundError
 from api.services.constants import KG_ANOMALY_DATASET_META
 from api.services.registry import ModelRegistry
 class KgAnomalyDatasetsView(APIView):
@@ -34,9 +36,139 @@ class KgAnomalySampleSubgraphsView(APIView):
         count = int(request.query_params.get("count", 5))
         count = max(1, min(10, count))
-        subgraphs = sg_info.subgraphs[:count]
         return Response({
             "dataset_id": dataset_id,
             "subgraphs": subgraphs,
         })

 from rest_framework.response import Response
 from rest_framework.views import APIView
+from api.exceptions import InvalidRequestError, ModelUnavailable, NotFoundError
 from api.services.constants import KG_ANOMALY_DATASET_META
+from api.services.kg_anomaly_inference import apply_edge_noise, build_kg_tensors
 from api.services.registry import ModelRegistry
+from api.views.graph_generation import _streaming_sse_response
 class KgAnomalyDatasetsView(APIView):
         count = int(request.query_params.get("count", 5))
         count = max(1, min(10, count))
+        subgraphs = [dict(sg) for sg in sg_info.subgraphs[:count]]
+        noise_level_raw = request.query_params.get("noise_level")
+        if noise_level_raw is not None:
+            try:
+                noise_level = float(noise_level_raw)
+            except ValueError:
+                raise InvalidRequestError("'noise_level' must be a float in (0, 1]")
+            if not (0.0 < noise_level <= 1.0):
+                raise InvalidRequestError("'noise_level' must be in (0, 1]")
+            task = request.query_params.get("task", "correct")
+            if task not in ("correct", "generate"):
+                raise InvalidRequestError("'task' must be 'correct' or 'generate'")
+            available = registry.kg_anomaly_checkpoints_available.get(dataset_id, [])
+            if task not in available:
+                raise ModelUnavailable(
+                    f"No '{task}' checkpoint available for dataset '{dataset_id}'")
+            seed_raw = request.query_params.get("seed")
+            seed = int(seed_raw) if seed_raw is not None else None
+            loader = registry.loaders[dataset_id]
+            model = registry._load_kg_anomaly_model(dataset_id, task)
+            for i, sg in enumerate(subgraphs):
+                offset_seed = None if seed is None else seed + i
+                tensors = build_kg_tensors(sg, loader, model)
+                sg["edges"] = apply_edge_noise(model, tensors, task, noise_level, offset_seed)
         return Response({
             "dataset_id": dataset_id,
             "subgraphs": subgraphs,
         })
+def _validate_subgraph(subgraph):
+    if not isinstance(subgraph, dict):
+        raise InvalidRequestError("'subgraph' must be an object with 'nodes' and 'edges'")
+    nodes = subgraph.get("nodes")
+    edges = subgraph.get("edges")
+    if not isinstance(nodes, list) or not (2 <= len(nodes) <= 20):
+        raise InvalidRequestError("'subgraph.nodes' must be a list of 2 to 20 items")
+    if not isinstance(edges, list):
+        raise InvalidRequestError("'subgraph.edges' must be a list")
+    n = len(nodes)
+    for i, node in enumerate(nodes):
+        if not isinstance(node, dict) or "entity_id" not in node:
+            raise InvalidRequestError(f"subgraph.nodes[{i}] must have 'entity_id'")
+    for i, e in enumerate(edges):
+        if not isinstance(e, dict):
+            raise InvalidRequestError(f"subgraph.edges[{i}] must be an object")
+        for field in ("source_idx", "target_idx", "relation_id"):
+            if field not in e:
+                raise InvalidRequestError(f"subgraph.edges[{i}] missing '{field}'")
+        if not (0 <= int(e["source_idx"]) < n and 0 <= int(e["target_idx"]) < n):
+            raise InvalidRequestError(f"subgraph.edges[{i}] has out-of-range node index")
+        if int(e["source_idx"]) == int(e["target_idx"]):
+            raise InvalidRequestError(f"subgraph.edges[{i}] is a self-loop (not allowed)")
+class KgAnomalyCorrectView(APIView):
+    def post(self, request):
+        data = request.data
+        registry = ModelRegistry.get()
+        dataset_id = data.get("dataset_id")
+        if dataset_id not in KG_ANOMALY_DATASET_META:
+            raise InvalidRequestError(
+                f"Unknown dataset_id '{dataset_id}'. Valid: {list(KG_ANOMALY_DATASET_META)}")
+        task = data.get("task", "correct")
+        if task not in ("correct", "generate"):
+            raise InvalidRequestError("task must be 'correct' or 'generate'")
+        available = registry.kg_anomaly_checkpoints_available.get(dataset_id, [])
+        if task not in available:
+            raise ModelUnavailable(
+                f"No '{task}' checkpoint available for dataset '{dataset_id}'")
+        sampling_mode = data.get("sampling_mode")
+        if sampling_mode not in ("standard", "multiprox"):
+            raise InvalidRequestError("sampling_mode must be 'standard' or 'multiprox'")
+        subgraph = data.get("subgraph")
+        _validate_subgraph(subgraph)
+        if sampling_mode == "standard":
+            diffusion_steps = min(max(int(data.get("diffusion_steps", 500)), 50), 1000)
+            chain_frames = min(max(int(data.get("chain_frames", 20)), 10), 30)
+            gen = registry.kg_anomaly_correct_stream(
+                dataset_id, task, sampling_mode, subgraph,
+                diffusion_steps, chain_frames, None)
+        else:
+            mp = data.get("multiprox_params")
+            if not mp or not isinstance(mp, dict):
+                raise InvalidRequestError("multiprox_params is required for multiprox sampling_mode")
+            m = int(mp.get("m", 100))
+            if not (2 <= m <= 100):
+                raise InvalidRequestError("multiprox_params.m must be in [2, 100]")
+            n = int(mp.get("n", 10))
+            if n < 1:
+                raise InvalidRequestError("multiprox_params.n must be >= 1")
+            t = float(mp.get("t", 0.5))
+            t_prime = float(mp.get("t_prime", 0.1))
+            if not (0 < t_prime <= t <= 1):
+                raise InvalidRequestError(
+                    "multiprox_params must satisfy 0 < t_prime <= t <= 1")
+            gibbs_chain_freq = int(mp.get("gibbs_chain_freq", max(1, m // 10)))
+            if not (1 <= gibbs_chain_freq <= m):
+                raise InvalidRequestError(
+                    f"multiprox_params.gibbs_chain_freq must be in [1, {m}]")
+            multiprox_params = {
+                "n": n, "m": m, "t": t, "t_prime": t_prime,
+                "gibbs_chain_freq": gibbs_chain_freq,
+            }
+            gen = registry.kg_anomaly_correct_stream(
+                dataset_id, task, sampling_mode, subgraph,
+                None, None, multiprox_params)
+        return _streaming_sse_response(gen)
+class KgAnomalyContinueView(APIView):
+    def post(self, request):
+        state_b64 = request.data.get("state")
+        if not state_b64 or not isinstance(state_b64, str):
+            raise InvalidRequestError("'state' is required and must be a non-empty string")
+        gen = ModelRegistry.get().kg_anomaly_continue_stream(state_b64)
+        return _streaming_sse_response(gen)

src/backend/research_api/settings.py CHANGED Viewed

@@ -7,9 +7,10 @@ PROJECT_ROOT = BASE_DIR.parent.parent  # Website root
 # Add research repos to sys.path so their modules can be imported
 _COINS_KG_ROOT = str(PROJECT_ROOT / "src" / "research" / "COINs-KGGeneration")
 _MULTIPROXAN_ROOT = str(PROJECT_ROOT / "src" / "research" / "MultiProxAn")
 _MULTIPROXAN_SRC = str(PROJECT_ROOT / "src" / "research" / "MultiProxAn" / "src")
-for _path in (_COINS_KG_ROOT, _MULTIPROXAN_ROOT, _MULTIPROXAN_SRC):
     if _path not in sys.path:
         sys.path.insert(0, _path)

 # Add research repos to sys.path so their modules can be imported
 _COINS_KG_ROOT = str(PROJECT_ROOT / "src" / "research" / "COINs-KGGeneration")
+_DIGRESS_KG_SRC = str(PROJECT_ROOT / "src" / "research" / "COINs-KGGeneration" / "graph_generation" / "src")
 _MULTIPROXAN_ROOT = str(PROJECT_ROOT / "src" / "research" / "MultiProxAn")
 _MULTIPROXAN_SRC = str(PROJECT_ROOT / "src" / "research" / "MultiProxAn" / "src")
+for _path in (_COINS_KG_ROOT, _DIGRESS_KG_SRC, _MULTIPROXAN_ROOT, _MULTIPROXAN_SRC):
     if _path not in sys.path:
         sys.path.insert(0, _path)