Spaces:
Paused
Paused
| """ | |
| Schema-loader codebook bridge. | |
| When an annotation scheme opts in with ``codebook: true``, its label | |
| list is sourced from the project's mutable codebook instead of (only) | |
| the static YAML ``labels``. Applied once at server start, before | |
| front-end generation, so every downstream generator | |
| (radio/multiselect/span/hierarchical_multiselect) keeps reading | |
| ``scheme["labels"]`` unchanged. | |
| Legacy preservation: a config's existing YAML ``labels`` seed the | |
| codebook the first time (so old configs keep working and the codebook | |
| starts populated); thereafter the database is the source of truth. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from typing import Any, Dict, List | |
| from potato.codebook import create_code | |
| from potato.codebook.codebook import Codebook | |
| from potato.codebook.service import DuplicateCodeError | |
| logger = logging.getLogger(__name__) | |
| def _label_name(entry: Any) -> str: | |
| if isinstance(entry, str): | |
| return entry | |
| if isinstance(entry, dict): | |
| return str(entry.get("name") or entry.get("label") or "").strip() | |
| return str(entry).strip() | |
| def _project_of(config: Dict[str, Any]) -> str: | |
| return config.get("annotation_task_name") or "default" | |
| def _seed_from_yaml( | |
| task_dir: str, project: str, yaml_labels: List[Any] | |
| ) -> None: | |
| for entry in yaml_labels or []: | |
| name = _label_name(entry) | |
| if not name: | |
| continue | |
| try: | |
| create_code( | |
| task_dir, project=project, name=name, | |
| created_by="config") | |
| except DuplicateCodeError: | |
| pass # idempotent: re-seeding an existing code is fine | |
| def apply_codebook_to_schemes(config: Dict[str, Any]) -> None: | |
| """Mutate ``config['annotation_schemes']`` in place: for every | |
| scheme with ``codebook: true``, point ``labels`` at the codebook | |
| (seeding it from the scheme's YAML labels on first run).""" | |
| schemes = config.get("annotation_schemes") or [] | |
| task_dir = config.get("task_dir", ".") | |
| project = _project_of(config) | |
| for scheme in schemes: | |
| if not isinstance(scheme, dict) or not scheme.get("codebook"): | |
| continue | |
| cb = Codebook.load(task_dir, project) | |
| if cb.is_empty(): | |
| _seed_from_yaml(task_dir, project, scheme.get("labels")) | |
| cb = Codebook.load(task_dir, project) | |
| names = cb.labels() | |
| if names: | |
| scheme["labels"] = names | |
| logger.info( | |
| "Codebook bridge: scheme %r now sources %d label(s) " | |
| "from the project codebook", | |
| scheme.get("name"), len(names)) | |
| def _icl_sync_listener(task_dir: str, project: str) -> None: | |
| """Codebook change listener: refresh the *live* server config's | |
| scheme labels so ICL prompts (built fresh from ``schema['labels']`` | |
| each call) are restricted to the codebook's current set. Refreshing | |
| the source the prompt is built from *is* the prompt-cache | |
| invalidation — there is no separate persistent ICL prompt cache. | |
| """ | |
| try: | |
| from potato.server_utils import config_module | |
| cfg = config_module.config | |
| except Exception: | |
| return | |
| if not cfg: | |
| return | |
| if (cfg.get("annotation_task_name") or "default") != project: | |
| return | |
| apply_codebook_to_schemes(cfg) | |
| def install_codebook_icl_sync() -> None: | |
| """Register the ICL-sync listener (idempotent). Called at server | |
| init alongside the other mode initializers.""" | |
| from potato.codebook.service import register_change_listener | |
| register_change_listener(_icl_sync_listener) | |