Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import time | |
| import urllib.error | |
| import urllib.request | |
| from pathlib import Path | |
| BASE_URL = "http://127.0.0.1:7860" | |
| ROOT = Path("/opt/rca_label_studio") | |
| MARKER = Path(os.getenv("LABEL_STUDIO_BASE_DATA_DIR", "/data")) / ".rca_projects_bootstrapped" | |
| def request(method: str, path: str, payload: dict | list | None = None) -> tuple[int, dict | list | str]: | |
| token = os.getenv("LABEL_STUDIO_USER_TOKEN", "rca-admin-token") | |
| body = None | |
| headers = {"Authorization": f"Token {token}"} | |
| if payload is not None: | |
| body = json.dumps(payload).encode("utf-8") | |
| headers["Content-Type"] = "application/json" | |
| req = urllib.request.Request( | |
| f"{BASE_URL}{path}", | |
| data=body, | |
| headers=headers, | |
| method=method, | |
| ) | |
| with urllib.request.urlopen(req, timeout=30) as resp: | |
| raw = resp.read().decode("utf-8") | |
| if not raw: | |
| return resp.status, "" | |
| try: | |
| return resp.status, json.loads(raw) | |
| except json.JSONDecodeError: | |
| return resp.status, raw | |
| def wait_for_server() -> None: | |
| deadline = time.time() + 180 | |
| while time.time() < deadline: | |
| try: | |
| request("GET", "/api/projects/") | |
| return | |
| except Exception: | |
| time.sleep(2) | |
| raise RuntimeError("Label Studio did not become ready within 180 seconds") | |
| def existing_project_titles() -> set[str]: | |
| _, payload = request("GET", "/api/projects/") | |
| if isinstance(payload, dict): | |
| projects = payload.get("results", []) | |
| elif isinstance(payload, list): | |
| projects = payload | |
| else: | |
| projects = [] | |
| return {str(project.get("title", "")) for project in projects if isinstance(project, dict)} | |
| def create_project(title: str, tasks_path: Path, label_config: str) -> None: | |
| with tasks_path.open("r", encoding="utf-8") as f: | |
| tasks = json.load(f) | |
| description = ( | |
| "Validate the machine RCA for generated legal judgments. " | |
| "Choose the earliest meaningful FIRAC stage, major error category, " | |
| "and one exact minor error category." | |
| ) | |
| _, project = request( | |
| "POST", | |
| "/api/projects/", | |
| { | |
| "title": title, | |
| "description": description, | |
| "label_config": label_config, | |
| "sampling": "Sequential sampling", | |
| "show_instruction": True, | |
| "show_skip_button": False, | |
| "enable_empty_annotation": False, | |
| }, | |
| ) | |
| project_id = project["id"] | |
| request( | |
| "POST", | |
| f"/api/projects/{project_id}/import?return_task_ids=true", | |
| tasks, | |
| ) | |
| print(f"Bootstrapped {title} with {len(tasks)} tasks", flush=True) | |
| def main() -> None: | |
| wait_for_server() | |
| if MARKER.exists(): | |
| print("RCA Label Studio bootstrap marker exists; skipping project creation.", flush=True) | |
| return | |
| label_config = (ROOT / "label_config.xml").read_text(encoding="utf-8") | |
| titles = existing_project_titles() | |
| for idx in range(1, 4): | |
| title = f"RCA Validation - Annotator {idx}" | |
| if title in titles: | |
| print(f"{title} already exists; skipping.", flush=True) | |
| continue | |
| create_project(title, ROOT / "data" / f"tasks_annotator_{idx}.json", label_config) | |
| MARKER.write_text(str(time.time()), encoding="utf-8") | |
| if __name__ == "__main__": | |
| main() | |