File size: 3,586 Bytes
dc4d6fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
Schema-loader codebook bridge.

When an annotation scheme opts in with ``codebook: true``, its label
list is sourced from the project's mutable codebook instead of (only)
the static YAML ``labels``. Applied once at server start, before
front-end generation, so every downstream generator
(radio/multiselect/span/hierarchical_multiselect) keeps reading
``scheme["labels"]`` unchanged.

Legacy preservation: a config's existing YAML ``labels`` seed the
codebook the first time (so old configs keep working and the codebook
starts populated); thereafter the database is the source of truth.
"""

from __future__ import annotations

import logging
from typing import Any, Dict, List

from potato.codebook import create_code
from potato.codebook.codebook import Codebook
from potato.codebook.service import DuplicateCodeError

logger = logging.getLogger(__name__)


def _label_name(entry: Any) -> str:
    if isinstance(entry, str):
        return entry
    if isinstance(entry, dict):
        return str(entry.get("name") or entry.get("label") or "").strip()
    return str(entry).strip()


def _project_of(config: Dict[str, Any]) -> str:
    return config.get("annotation_task_name") or "default"


def _seed_from_yaml(
    task_dir: str, project: str, yaml_labels: List[Any]
) -> None:
    for entry in yaml_labels or []:
        name = _label_name(entry)
        if not name:
            continue
        try:
            create_code(
                task_dir, project=project, name=name,
                created_by="config")
        except DuplicateCodeError:
            pass  # idempotent: re-seeding an existing code is fine


def apply_codebook_to_schemes(config: Dict[str, Any]) -> None:
    """Mutate ``config['annotation_schemes']`` in place: for every
    scheme with ``codebook: true``, point ``labels`` at the codebook
    (seeding it from the scheme's YAML labels on first run)."""
    schemes = config.get("annotation_schemes") or []
    task_dir = config.get("task_dir", ".")
    project = _project_of(config)

    for scheme in schemes:
        if not isinstance(scheme, dict) or not scheme.get("codebook"):
            continue

        cb = Codebook.load(task_dir, project)
        if cb.is_empty():
            _seed_from_yaml(task_dir, project, scheme.get("labels"))
            cb = Codebook.load(task_dir, project)

        names = cb.labels()
        if names:
            scheme["labels"] = names
            logger.info(
                "Codebook bridge: scheme %r now sources %d label(s) "
                "from the project codebook",
                scheme.get("name"), len(names))


def _icl_sync_listener(task_dir: str, project: str) -> None:
    """Codebook change listener: refresh the *live* server config's
    scheme labels so ICL prompts (built fresh from ``schema['labels']``
    each call) are restricted to the codebook's current set. Refreshing
    the source the prompt is built from *is* the prompt-cache
    invalidation — there is no separate persistent ICL prompt cache.
    """
    try:
        from potato.server_utils import config_module
        cfg = config_module.config
    except Exception:
        return
    if not cfg:
        return
    if (cfg.get("annotation_task_name") or "default") != project:
        return
    apply_codebook_to_schemes(cfg)


def install_codebook_icl_sync() -> None:
    """Register the ICL-sync listener (idempotent). Called at server
    init alongside the other mode initializers."""
    from potato.codebook.service import register_change_listener
    register_change_listener(_icl_sync_listener)