davanstrien HF Staff commited on
Commit
3dee2c6
·
verified ·
1 Parent(s): 52c4067

Patch _get_tasks to use PAT/refresh-token flow (legacy token auth disabled by default in modern LS)

Browse files
Files changed (1) hide show
  1. model.py +25 -5
model.py CHANGED
@@ -1,8 +1,8 @@
1
  import os
2
  import pathlib
3
  import re
4
- import label_studio_sdk
5
  import logging
 
6
 
7
  from typing import List, Dict, Optional
8
  from label_studio_ml.model import LabelStudioMLBase
@@ -106,10 +106,30 @@ class HuggingFaceNER(LabelStudioMLBase):
106
  return ModelResponse(predictions=predictions, model_version=self.get('model_version'))
107
 
108
  def _get_tasks(self, project_id):
109
- # download annotated tasks from Label Studio
110
- ls = label_studio_sdk.Client(self.LABEL_STUDIO_HOST, self.LABEL_STUDIO_API_KEY)
111
- project = ls.get_project(id=project_id)
112
- tasks = project.get_labeled_tasks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  return tasks
114
 
115
  def tokenize_and_align_labels(self, examples, tokenizer):
 
1
  import os
2
  import pathlib
3
  import re
 
4
  import logging
5
+ import requests
6
 
7
  from typing import List, Dict, Optional
8
  from label_studio_ml.model import LabelStudioMLBase
 
106
  return ModelResponse(predictions=predictions, model_version=self.get('model_version'))
107
 
108
  def _get_tasks(self, project_id):
109
+ # Download annotated tasks from Label Studio.
110
+ # (HF Spaces patch) Modern LS instances disable legacy-token auth, which
111
+ # breaks the upstream `label_studio_sdk.Client(host, api_key)` path.
112
+ # LABEL_STUDIO_API_KEY is treated as a Personal Access Token (refresh
113
+ # token); we exchange it for a short-lived access token, then call the
114
+ # LS REST API directly. Filters to tasks with at least one annotation
115
+ # (legacy `project.get_labeled_tasks()` semantics).
116
+ refresh_resp = requests.post(
117
+ f"{self.LABEL_STUDIO_HOST}/api/token/refresh/",
118
+ json={"refresh": self.LABEL_STUDIO_API_KEY},
119
+ timeout=30,
120
+ )
121
+ refresh_resp.raise_for_status()
122
+ access = refresh_resp.json()["access"]
123
+ headers = {"Authorization": f"Bearer {access}"}
124
+ tasks: List[Dict] = []
125
+ url = f"{self.LABEL_STUDIO_HOST}/api/tasks/?project={project_id}&page_size=200&fields=all"
126
+ while url:
127
+ r = requests.get(url, headers=headers, timeout=60)
128
+ r.raise_for_status()
129
+ payload = r.json()
130
+ page = payload if isinstance(payload, list) else payload.get("tasks", [])
131
+ tasks.extend(t for t in page if t.get("annotations"))
132
+ url = payload.get("next") if isinstance(payload, dict) else None
133
  return tasks
134
 
135
  def tokenize_and_align_labels(self, examples, tokenizer):