File size: 3,107 Bytes
dc4d6fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Codebook read-model.

A thin, read-only view over the `codes` table for one project: builds
the code tree and the flat label list the schema-loader bridge needs.
Mutations go through `service.py` (the single audited write path).
"""

from __future__ import annotations

from typing import Any, Dict, List, Optional

from potato.codebook import store


class Codebook:
    """In-memory snapshot of a project's codebook.

    Construct via `Codebook.load(task_dir, project)`. Cheap to rebuild;
    callers reload after a mutation rather than mutating the snapshot.
    """

    def __init__(self, project: str, codes: List[Dict[str, Any]]):
        self.project = project
        self._codes = codes
        self._by_id: Dict[str, Dict[str, Any]] = {c["id"]: c for c in codes}

    @classmethod
    def load(cls, task_dir: str, project: str) -> "Codebook":
        # Archived codes (e.g. merged away in Phase 2 C) must not reach
        # the label list / tree — those feed the ICL prompt and the live
        # forms. `.get` keeps this tolerant of pre-0003 schemas.
        codes = [c for c in store.list_codes(task_dir, project)
                 if not c.get("archived_at")]
        return cls(project, codes)

    def __len__(self) -> int:
        return len(self._codes)

    def is_empty(self) -> bool:
        return not self._codes

    def get(self, code_id: str) -> Optional[Dict[str, Any]]:
        return self._by_id.get(code_id)

    def children(self, parent_id: str = store.ROOT) -> List[Dict[str, Any]]:
        kids = [c for c in self._codes if c["parent_id"] == parent_id]
        kids.sort(key=lambda c: (c["sort_order"], c["name"]))
        return kids

    def labels(self) -> List[str]:
        """Flat list of code names in tree order — the legacy label list
        the radio/multiselect/span loaders consume when a scheme opts in
        via `codebook: true`."""
        out: List[str] = []

        def walk(parent: str) -> None:
            for c in self.children(parent):
                out.append(c["name"])
                walk(c["id"])

        walk(store.ROOT)
        return out

    def label_to_id(self) -> Dict[str, str]:
        """Map code name -> code id (first occurrence in tree order).
        Lets the annotation pipeline store a parallel `code_id` while
        keeping the legacy `label` string."""
        mapping: Dict[str, str] = {}
        for name in self.labels():
            if name not in mapping:
                match = next(
                    (c for c in self._codes if c["name"] == name), None)
                if match:
                    mapping[name] = match["id"]
        return mapping

    def as_tree(self) -> List[Dict[str, Any]]:
        """Nested [{id,name,color,children:[...]}] for the codebook UI."""

        def node(c: Dict[str, Any]) -> Dict[str, Any]:
            return {
                "id": c["id"],
                "name": c["name"],
                "color": c["color"],
                "children": [node(k) for k in self.children(c["id"])],
            }

        return [node(c) for c in self.children(store.ROOT)]