File size: 3,027 Bytes
5a2c6b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import argparse
import json
import urllib.request
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]


def request(
    base_url: str,
    token: str,
    method: str,
    path: str,
    payload: dict[str, Any] | list[Any] | None = None,
) -> Any:
    body = None
    headers = {"Authorization": f"Token {token}"}
    if payload is not None:
        body = json.dumps(payload).encode("utf-8")
        headers["Content-Type"] = "application/json"

    req = urllib.request.Request(
        f"{base_url.rstrip('/')}{path}",
        data=body,
        headers=headers,
        method=method,
    )
    with urllib.request.urlopen(req, timeout=120) as resp:
        raw = resp.read().decode("utf-8")
        return json.loads(raw) if raw else None


def list_projects(base_url: str, token: str) -> list[dict[str, Any]]:
    payload = request(base_url, token, "GET", "/api/projects/")
    if isinstance(payload, dict):
        return payload.get("results", [])
    if isinstance(payload, list):
        return payload
    return []


def create_project(base_url: str, token: str, title: str, tasks_path: Path, label_config: str) -> None:
    tasks = json.loads(tasks_path.read_text(encoding="utf-8"))
    project = request(
        base_url,
        token,
        "POST",
        "/api/projects/",
        {
            "title": title,
            "description": (
                "Human evaluation of why generated legal judgments reached the wrong outcome. "
                "The machine RCA is intentionally hidden from annotators."
            ),
            "label_config": label_config,
            "sampling": "Sequential sampling",
            "show_instruction": True,
            "show_skip_button": False,
            "enable_empty_annotation": False,
        },
    )
    request(
        base_url,
        token,
        "POST",
        f"/api/projects/{project['id']}/import?return_task_ids=true",
        tasks,
    )
    print(f"Created {title} with {len(tasks)} tasks")


def main() -> None:
    parser = argparse.ArgumentParser(description="Recreate the three RCA Label Studio projects.")
    parser.add_argument("--base-url", required=True, help="Label Studio base URL.")
    parser.add_argument("--token", required=True, help="Label Studio API token.")
    args = parser.parse_args()

    label_config = (ROOT / "label_config.xml").read_text(encoding="utf-8")
    titles = {f"RCA Validation - Annotator {idx}" for idx in range(1, 4)}

    for project in list_projects(args.base_url, args.token):
        if project.get("title") in titles:
            request(args.base_url, args.token, "DELETE", f"/api/projects/{project['id']}/")
            print(f"Deleted {project['title']}")

    for idx in range(1, 4):
        create_project(
            args.base_url,
            args.token,
            f"RCA Validation - Annotator {idx}",
            ROOT / "data" / f"tasks_annotator_{idx}.json",
            label_config,
        )


if __name__ == "__main__":
    main()