import json import os import time import urllib.error import urllib.request from pathlib import Path BASE_URL = "http://127.0.0.1:7860" ROOT = Path("/opt/rca_label_studio") MARKER = Path(os.getenv("LABEL_STUDIO_BASE_DATA_DIR", "/data")) / ".rca_projects_bootstrapped" def request(method: str, path: str, payload: dict | list | None = None) -> tuple[int, dict | list | str]: token = os.getenv("LABEL_STUDIO_USER_TOKEN", "rca-admin-token") body = None headers = {"Authorization": f"Token {token}"} if payload is not None: body = json.dumps(payload).encode("utf-8") headers["Content-Type"] = "application/json" req = urllib.request.Request( f"{BASE_URL}{path}", data=body, headers=headers, method=method, ) with urllib.request.urlopen(req, timeout=30) as resp: raw = resp.read().decode("utf-8") if not raw: return resp.status, "" try: return resp.status, json.loads(raw) except json.JSONDecodeError: return resp.status, raw def wait_for_server() -> None: deadline = time.time() + 180 while time.time() < deadline: try: request("GET", "/api/projects/") return except Exception: time.sleep(2) raise RuntimeError("Label Studio did not become ready within 180 seconds") def existing_project_titles() -> set[str]: _, payload = request("GET", "/api/projects/") if isinstance(payload, dict): projects = payload.get("results", []) elif isinstance(payload, list): projects = payload else: projects = [] return {str(project.get("title", "")) for project in projects if isinstance(project, dict)} def create_project(title: str, tasks_path: Path, label_config: str) -> None: with tasks_path.open("r", encoding="utf-8") as f: tasks = json.load(f) description = ( "Validate the machine RCA for generated legal judgments. " "Choose the earliest meaningful FIRAC stage, major error category, " "and one exact minor error category." ) _, project = request( "POST", "/api/projects/", { "title": title, "description": description, "label_config": label_config, "sampling": "Sequential sampling", "show_instruction": True, "show_skip_button": False, "enable_empty_annotation": False, }, ) project_id = project["id"] request( "POST", f"/api/projects/{project_id}/import?return_task_ids=true", tasks, ) print(f"Bootstrapped {title} with {len(tasks)} tasks", flush=True) def main() -> None: wait_for_server() if MARKER.exists(): print("RCA Label Studio bootstrap marker exists; skipping project creation.", flush=True) return label_config = (ROOT / "label_config.xml").read_text(encoding="utf-8") titles = existing_project_titles() for idx in range(1, 4): title = f"RCA Validation - Annotator {idx}" if title in titles: print(f"{title} already exists; skipping.", flush=True) continue create_project(title, ROOT / "data" / f"tasks_annotator_{idx}.json", label_config) MARKER.write_text(str(time.time()), encoding="utf-8") if __name__ == "__main__": main()