Spaces:
Sleeping
Sleeping
File size: 3,402 Bytes
fe3046d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | import json
import os
import time
import urllib.error
import urllib.request
from pathlib import Path
BASE_URL = "http://127.0.0.1:7860"
ROOT = Path("/opt/rca_label_studio")
MARKER = Path(os.getenv("LABEL_STUDIO_BASE_DATA_DIR", "/data")) / ".rca_projects_bootstrapped"
def request(method: str, path: str, payload: dict | list | None = None) -> tuple[int, dict | list | str]:
token = os.getenv("LABEL_STUDIO_USER_TOKEN", "rca-admin-token")
body = None
headers = {"Authorization": f"Token {token}"}
if payload is not None:
body = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
req = urllib.request.Request(
f"{BASE_URL}{path}",
data=body,
headers=headers,
method=method,
)
with urllib.request.urlopen(req, timeout=30) as resp:
raw = resp.read().decode("utf-8")
if not raw:
return resp.status, ""
try:
return resp.status, json.loads(raw)
except json.JSONDecodeError:
return resp.status, raw
def wait_for_server() -> None:
deadline = time.time() + 180
while time.time() < deadline:
try:
request("GET", "/api/projects/")
return
except Exception:
time.sleep(2)
raise RuntimeError("Label Studio did not become ready within 180 seconds")
def existing_project_titles() -> set[str]:
_, payload = request("GET", "/api/projects/")
if isinstance(payload, dict):
projects = payload.get("results", [])
elif isinstance(payload, list):
projects = payload
else:
projects = []
return {str(project.get("title", "")) for project in projects if isinstance(project, dict)}
def create_project(title: str, tasks_path: Path, label_config: str) -> None:
with tasks_path.open("r", encoding="utf-8") as f:
tasks = json.load(f)
description = (
"Validate the machine RCA for generated legal judgments. "
"Choose the earliest meaningful FIRAC stage, major error category, "
"and one exact minor error category."
)
_, project = request(
"POST",
"/api/projects/",
{
"title": title,
"description": description,
"label_config": label_config,
"sampling": "Sequential sampling",
"show_instruction": True,
"show_skip_button": False,
"enable_empty_annotation": False,
},
)
project_id = project["id"]
request(
"POST",
f"/api/projects/{project_id}/import?return_task_ids=true",
tasks,
)
print(f"Bootstrapped {title} with {len(tasks)} tasks", flush=True)
def main() -> None:
wait_for_server()
if MARKER.exists():
print("RCA Label Studio bootstrap marker exists; skipping project creation.", flush=True)
return
label_config = (ROOT / "label_config.xml").read_text(encoding="utf-8")
titles = existing_project_titles()
for idx in range(1, 4):
title = f"RCA Validation - Annotator {idx}"
if title in titles:
print(f"{title} already exists; skipping.", flush=True)
continue
create_project(title, ROOT / "data" / f"tasks_annotator_{idx}.json", label_config)
MARKER.write_text(str(time.time()), encoding="utf-8")
if __name__ == "__main__":
main()
|