Andrej Janchevski commited on
Commit ·
ee3e8fe
1
Parent(s): 0f7b533
feat(coins): add optional seed param and label fields to sample-triples
Browse files- registry.sample_triples now accepts seed= for deterministic sampling
via random.Random(seed); omitted seed preserves prior random behaviour
- Each head/relation/tail gains a dataset-cleaned label alongside id and
name (NELL strips concept:, Freebase strips /m/, WordNet drops POS
suffix) so the frontend can show display-friendly strings without
duplicating the cleanup logic
- OpenAPI, Postman collection, and backend README updated in lockstep
- Enables a day-stable NELL Fact of the Day widget seeded by ISO date
- docs/api.yaml +19 -0
- docs/postman/collection.json +4 -3
- src/backend/README.md +1 -1
- src/backend/api/services/registry.py +16 -6
- src/backend/api/views/coins.py +4 -1
docs/api.yaml
CHANGED
|
@@ -169,6 +169,17 @@ paths:
|
|
| 169 |
maximum: 50
|
| 170 |
default: 10
|
| 171 |
description: Number of random triples to return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
responses:
|
| 173 |
"200":
|
| 174 |
description: Sample triples
|
|
@@ -844,6 +855,10 @@ components:
|
|
| 844 |
name:
|
| 845 |
type: string
|
| 846 |
example: dog's_breakfast.n.01
|
|
|
|
|
|
|
|
|
|
|
|
|
| 847 |
|
| 848 |
CoinsRelationsResponse:
|
| 849 |
type: object
|
|
@@ -872,6 +887,10 @@ components:
|
|
| 872 |
name:
|
| 873 |
type: string
|
| 874 |
example: _hypernym
|
|
|
|
|
|
|
|
|
|
|
|
|
| 875 |
|
| 876 |
CoinsSampleTriplesResponse:
|
| 877 |
type: object
|
|
|
|
| 169 |
maximum: 50
|
| 170 |
default: 10
|
| 171 |
description: Number of random triples to return
|
| 172 |
+
- name: seed
|
| 173 |
+
in: query
|
| 174 |
+
required: false
|
| 175 |
+
schema:
|
| 176 |
+
type: string
|
| 177 |
+
description: |
|
| 178 |
+
Optional sampling seed. When provided, sampling is deterministic —
|
| 179 |
+
the same `(dataset_id, count, seed)` always yields the same triples.
|
| 180 |
+
Useful for day-stable "fact of the day" widgets (e.g. seed by the
|
| 181 |
+
ISO date). When omitted, sampling is fully random.
|
| 182 |
+
example: "2026-04-15"
|
| 183 |
responses:
|
| 184 |
"200":
|
| 185 |
description: Sample triples
|
|
|
|
| 855 |
name:
|
| 856 |
type: string
|
| 857 |
example: dog's_breakfast.n.01
|
| 858 |
+
label:
|
| 859 |
+
type: string
|
| 860 |
+
description: Dataset-specific short, display-friendly form of `name`. Only present on sample-triples responses.
|
| 861 |
+
example: dog's_breakfast
|
| 862 |
|
| 863 |
CoinsRelationsResponse:
|
| 864 |
type: object
|
|
|
|
| 887 |
name:
|
| 888 |
type: string
|
| 889 |
example: _hypernym
|
| 890 |
+
label:
|
| 891 |
+
type: string
|
| 892 |
+
description: Dataset-specific short, display-friendly form of `name`. Only present on sample-triples responses.
|
| 893 |
+
example: hypernym
|
| 894 |
|
| 895 |
CoinsSampleTriplesResponse:
|
| 896 |
type: object
|
docs/postman/collection.json
CHANGED
|
@@ -135,14 +135,15 @@
|
|
| 135 |
"method": "GET",
|
| 136 |
"header": [],
|
| 137 |
"url": {
|
| 138 |
-
"raw": "{{base_url}}/coins/datasets/wordnet/sample-triples?count=5",
|
| 139 |
"host": ["{{base_url}}"],
|
| 140 |
"path": ["coins", "datasets", "wordnet", "sample-triples"],
|
| 141 |
"query": [
|
| 142 |
-
{ "key": "count", "value": "5" }
|
|
|
|
| 143 |
]
|
| 144 |
},
|
| 145 |
-
"description": "Random sample triples from the dataset."
|
| 146 |
}
|
| 147 |
},
|
| 148 |
{
|
|
|
|
| 135 |
"method": "GET",
|
| 136 |
"header": [],
|
| 137 |
"url": {
|
| 138 |
+
"raw": "{{base_url}}/coins/datasets/wordnet/sample-triples?count=5&seed=2026-04-15",
|
| 139 |
"host": ["{{base_url}}"],
|
| 140 |
"path": ["coins", "datasets", "wordnet", "sample-triples"],
|
| 141 |
"query": [
|
| 142 |
+
{ "key": "count", "value": "5" },
|
| 143 |
+
{ "key": "seed", "value": "2026-04-15", "description": "Optional. When provided, sampling is deterministic (same seed + count ⇒ same triples). Omit for random." }
|
| 144 |
]
|
| 145 |
},
|
| 146 |
+
"description": "Random sample triples from the dataset. Each triple has head/relation/tail entries with { id, name, label }; `label` is a dataset-specific display-friendly form of `name` (NELL strips `concept:` prefixes, Freebase strips `/m/`, WordNet drops the POS suffix). Pass an optional `seed` (any string) for deterministic sampling — e.g. seed by today's ISO date for a day-stable 'fact of the day' widget."
|
| 147 |
}
|
| 148 |
},
|
| 149 |
{
|
src/backend/README.md
CHANGED
|
@@ -77,7 +77,7 @@ All endpoints are prefixed with `/api/v1/`.
|
|
| 77 |
| `GET` | `/coins/datasets` | List datasets with entity/relation counts |
|
| 78 |
| `GET` | `/coins/datasets/{id}/entities` | Paginated entity search (`?q=&page=&page_size=`) |
|
| 79 |
| `GET` | `/coins/datasets/{id}/relations` | Paginated relation search (`?q=&page=&page_size=`) |
|
| 80 |
-
| `GET` | `/coins/datasets/{id}/sample-triples` | Random training triples (`?count=10`) |
|
| 81 |
| `GET` | `/coins/models` | Available algorithms + supported query structures |
|
| 82 |
| `GET` | `/coins/query-structures` | Query graph templates for frontend rendering |
|
| 83 |
| `POST` | `/coins/predict` | Run link prediction / query answering |
|
|
|
|
| 77 |
| `GET` | `/coins/datasets` | List datasets with entity/relation counts |
|
| 78 |
| `GET` | `/coins/datasets/{id}/entities` | Paginated entity search (`?q=&page=&page_size=`) |
|
| 79 |
| `GET` | `/coins/datasets/{id}/relations` | Paginated relation search (`?q=&page=&page_size=`) |
|
| 80 |
+
| `GET` | `/coins/datasets/{id}/sample-triples` | Random training triples (`?count=10&seed=...`); optional `seed` makes sampling deterministic (same `seed+count` ⇒ same triples, e.g. seed by ISO date for a day-stable widget). Head/relation/tail each carry a dataset-cleaned `label` alongside `id`, `name` |
|
| 81 |
| `GET` | `/coins/models` | Available algorithms + supported query structures |
|
| 82 |
| `GET` | `/coins/query-structures` | Query graph templates for frontend rendering |
|
| 83 |
| `POST` | `/coins/predict` | Run link prediction / query answering |
|
src/backend/api/services/registry.py
CHANGED
|
@@ -8,6 +8,7 @@ import yaml
|
|
| 8 |
from django.conf import settings
|
| 9 |
|
| 10 |
from api.services.constants import COINS_CONFIG_SUFFIX, COINS_DATASET_META
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
@@ -530,8 +531,13 @@ class ModelRegistry:
|
|
| 530 |
start = (max(1, page) - 1) * page_size
|
| 531 |
return items[start:start + page_size], total
|
| 532 |
|
| 533 |
-
def sample_triples(self, dataset_id, count=10):
|
| 534 |
-
"""Return random triples with resolved entity/relation names.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
loader = self.loaders.get(dataset_id)
|
| 536 |
if loader is None:
|
| 537 |
return []
|
|
@@ -539,16 +545,20 @@ class ModelRegistry:
|
|
| 539 |
edge_data = loader.train_edge_data
|
| 540 |
count = min(count, len(edge_data))
|
| 541 |
|
| 542 |
-
|
|
|
|
| 543 |
|
| 544 |
result = []
|
| 545 |
for i in indices:
|
| 546 |
row = edge_data.iloc[i]
|
| 547 |
h, r, t = int(row.s), int(row.r), int(row.t)
|
|
|
|
|
|
|
|
|
|
| 548 |
result.append({
|
| 549 |
-
"head": {"id": h, "name":
|
| 550 |
-
"relation": {"id": r, "name":
|
| 551 |
-
"tail": {"id": t, "name":
|
| 552 |
})
|
| 553 |
return result
|
| 554 |
|
|
|
|
| 8 |
from django.conf import settings
|
| 9 |
|
| 10 |
from api.services.constants import COINS_CONFIG_SUFFIX, COINS_DATASET_META
|
| 11 |
+
from api.utils import clean_entity_name, clean_relation_name
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
|
|
|
| 531 |
start = (max(1, page) - 1) * page_size
|
| 532 |
return items[start:start + page_size], total
|
| 533 |
|
| 534 |
+
def sample_triples(self, dataset_id, count=10, seed=None):
|
| 535 |
+
"""Return random triples with resolved entity/relation names.
|
| 536 |
+
|
| 537 |
+
When ``seed`` is provided, sampling is deterministic — the same
|
| 538 |
+
``(dataset_id, count, seed)`` always yields the same triples. When
|
| 539 |
+
``seed`` is None, uses the global RNG.
|
| 540 |
+
"""
|
| 541 |
loader = self.loaders.get(dataset_id)
|
| 542 |
if loader is None:
|
| 543 |
return []
|
|
|
|
| 545 |
edge_data = loader.train_edge_data
|
| 546 |
count = min(count, len(edge_data))
|
| 547 |
|
| 548 |
+
rng = random.Random(seed) if seed is not None else random
|
| 549 |
+
indices = rng.sample(range(len(edge_data)), count)
|
| 550 |
|
| 551 |
result = []
|
| 552 |
for i in indices:
|
| 553 |
row = edge_data.iloc[i]
|
| 554 |
h, r, t = int(row.s), int(row.r), int(row.t)
|
| 555 |
+
h_name = str(inv_nodes.get(h, h))
|
| 556 |
+
r_name = str(inv_relations.get(r, r))
|
| 557 |
+
t_name = str(inv_nodes.get(t, t))
|
| 558 |
result.append({
|
| 559 |
+
"head": {"id": h, "name": h_name, "label": clean_entity_name(h_name, dataset_id)},
|
| 560 |
+
"relation": {"id": r, "name": r_name, "label": clean_relation_name(r_name, dataset_id)},
|
| 561 |
+
"tail": {"id": t, "name": t_name, "label": clean_entity_name(t_name, dataset_id)},
|
| 562 |
})
|
| 563 |
return result
|
| 564 |
|
src/backend/api/views/coins.py
CHANGED
|
@@ -78,9 +78,12 @@ class CoinsSampleTriplesView(APIView):
|
|
| 78 |
count = int(request.query_params.get("count", 10))
|
| 79 |
count = max(1, min(50, count))
|
| 80 |
|
|
|
|
|
|
|
|
|
|
| 81 |
return Response({
|
| 82 |
"dataset_id": dataset_id,
|
| 83 |
-
"triples": registry.sample_triples(dataset_id, count),
|
| 84 |
})
|
| 85 |
|
| 86 |
|
|
|
|
| 78 |
count = int(request.query_params.get("count", 10))
|
| 79 |
count = max(1, min(50, count))
|
| 80 |
|
| 81 |
+
seed_raw = request.query_params.get("seed")
|
| 82 |
+
seed = seed_raw if seed_raw not in (None, "") else None
|
| 83 |
+
|
| 84 |
return Response({
|
| 85 |
"dataset_id": dataset_id,
|
| 86 |
+
"triples": registry.sample_triples(dataset_id, count, seed=seed),
|
| 87 |
})
|
| 88 |
|
| 89 |
|