Patch _get_tasks to use PAT/refresh-token flow (legacy token auth disabled by default in modern LS)
Browse files
model.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import pathlib
|
| 3 |
import re
|
| 4 |
-
import label_studio_sdk
|
| 5 |
import logging
|
|
|
|
| 6 |
|
| 7 |
from typing import List, Dict, Optional
|
| 8 |
from label_studio_ml.model import LabelStudioMLBase
|
|
@@ -106,10 +106,30 @@ class HuggingFaceNER(LabelStudioMLBase):
|
|
| 106 |
return ModelResponse(predictions=predictions, model_version=self.get('model_version'))
|
| 107 |
|
| 108 |
def _get_tasks(self, project_id):
|
| 109 |
-
#
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
return tasks
|
| 114 |
|
| 115 |
def tokenize_and_align_labels(self, examples, tokenizer):
|
|
|
|
| 1 |
import os
|
| 2 |
import pathlib
|
| 3 |
import re
|
|
|
|
| 4 |
import logging
|
| 5 |
+
import requests
|
| 6 |
|
| 7 |
from typing import List, Dict, Optional
|
| 8 |
from label_studio_ml.model import LabelStudioMLBase
|
|
|
|
| 106 |
return ModelResponse(predictions=predictions, model_version=self.get('model_version'))
|
| 107 |
|
| 108 |
def _get_tasks(self, project_id):
|
| 109 |
+
# Download annotated tasks from Label Studio.
|
| 110 |
+
# (HF Spaces patch) Modern LS instances disable legacy-token auth, which
|
| 111 |
+
# breaks the upstream `label_studio_sdk.Client(host, api_key)` path.
|
| 112 |
+
# LABEL_STUDIO_API_KEY is treated as a Personal Access Token (refresh
|
| 113 |
+
# token); we exchange it for a short-lived access token, then call the
|
| 114 |
+
# LS REST API directly. Filters to tasks with at least one annotation
|
| 115 |
+
# (legacy `project.get_labeled_tasks()` semantics).
|
| 116 |
+
refresh_resp = requests.post(
|
| 117 |
+
f"{self.LABEL_STUDIO_HOST}/api/token/refresh/",
|
| 118 |
+
json={"refresh": self.LABEL_STUDIO_API_KEY},
|
| 119 |
+
timeout=30,
|
| 120 |
+
)
|
| 121 |
+
refresh_resp.raise_for_status()
|
| 122 |
+
access = refresh_resp.json()["access"]
|
| 123 |
+
headers = {"Authorization": f"Bearer {access}"}
|
| 124 |
+
tasks: List[Dict] = []
|
| 125 |
+
url = f"{self.LABEL_STUDIO_HOST}/api/tasks/?project={project_id}&page_size=200&fields=all"
|
| 126 |
+
while url:
|
| 127 |
+
r = requests.get(url, headers=headers, timeout=60)
|
| 128 |
+
r.raise_for_status()
|
| 129 |
+
payload = r.json()
|
| 130 |
+
page = payload if isinstance(payload, list) else payload.get("tasks", [])
|
| 131 |
+
tasks.extend(t for t in page if t.get("annotations"))
|
| 132 |
+
url = payload.get("next") if isinstance(payload, dict) else None
|
| 133 |
return tasks
|
| 134 |
|
| 135 |
def tokenize_and_align_labels(self, examples, tokenizer):
|