Spaces:
Sleeping
Sleeping
File size: 3,027 Bytes
5a2c6b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | import argparse
import json
import urllib.request
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
def request(
base_url: str,
token: str,
method: str,
path: str,
payload: dict[str, Any] | list[Any] | None = None,
) -> Any:
body = None
headers = {"Authorization": f"Token {token}"}
if payload is not None:
body = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
req = urllib.request.Request(
f"{base_url.rstrip('/')}{path}",
data=body,
headers=headers,
method=method,
)
with urllib.request.urlopen(req, timeout=120) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw) if raw else None
def list_projects(base_url: str, token: str) -> list[dict[str, Any]]:
payload = request(base_url, token, "GET", "/api/projects/")
if isinstance(payload, dict):
return payload.get("results", [])
if isinstance(payload, list):
return payload
return []
def create_project(base_url: str, token: str, title: str, tasks_path: Path, label_config: str) -> None:
tasks = json.loads(tasks_path.read_text(encoding="utf-8"))
project = request(
base_url,
token,
"POST",
"/api/projects/",
{
"title": title,
"description": (
"Human evaluation of why generated legal judgments reached the wrong outcome. "
"The machine RCA is intentionally hidden from annotators."
),
"label_config": label_config,
"sampling": "Sequential sampling",
"show_instruction": True,
"show_skip_button": False,
"enable_empty_annotation": False,
},
)
request(
base_url,
token,
"POST",
f"/api/projects/{project['id']}/import?return_task_ids=true",
tasks,
)
print(f"Created {title} with {len(tasks)} tasks")
def main() -> None:
parser = argparse.ArgumentParser(description="Recreate the three RCA Label Studio projects.")
parser.add_argument("--base-url", required=True, help="Label Studio base URL.")
parser.add_argument("--token", required=True, help="Label Studio API token.")
args = parser.parse_args()
label_config = (ROOT / "label_config.xml").read_text(encoding="utf-8")
titles = {f"RCA Validation - Annotator {idx}" for idx in range(1, 4)}
for project in list_projects(args.base_url, args.token):
if project.get("title") in titles:
request(args.base_url, args.token, "DELETE", f"/api/projects/{project['id']}/")
print(f"Deleted {project['title']}")
for idx in range(1, 4):
create_project(
args.base_url,
args.token,
f"RCA Validation - Annotator {idx}",
ROOT / "data" / f"tasks_annotator_{idx}.json",
label_config,
)
if __name__ == "__main__":
main()
|