nielsr HF Staff commited on
Commit
e7ecb76
·
verified ·
1 Parent(s): 9a91c80

Update paper link and citation in model card

Browse files

This PR improves the model card for `ScaleCUA` by:

* **Updating the paper link**: The "Paper" link at the top of the README now correctly points to the official Hugging Face paper page ([ScaleCUA: Scaling Open-Source Computer Use Agents with Cross-Platform Data](https://huggingface.co/papers/2509.15221)), instead of the GitHub repository.
* **Enhancing the citation**: The BibTeX entry in the "Citation" section has been updated to include the `journal` and `note` fields, aligning with the more complete citation provided in the project's GitHub README.

These changes ensure that users can easily access the research paper and have accurate citation information.

Files changed (1) hide show
  1. README.md +21 -29
README.md CHANGED
@@ -1,22 +1,22 @@
1
  ---
2
- license: apache-2.0
 
3
  datasets:
4
  - OpenGVLab/ScaleCUA-Data
5
  language:
6
  - en
 
 
7
  metrics:
8
  - accuracy
9
- base_model:
10
- - Qwen/Qwen2.5-VL-32B-Instruct
11
  pipeline_tag: image-text-to-text
12
- library_name: transformers
13
  tags:
14
  - agent
15
  ---
16
 
17
  # SCALECUA: SCALING UP COMPUTER USE AGENTS WITH CROSS-PLATFORM DATA
18
 
19
- [\[📂 GitHub\]](https://github.com/OpenGVLab/ScaleCUA) [\[📜 Paper\]](https://github.com/OpenGVLab/ScaleCUA) [\[🚀 Quick Start\]](#model-loading)
20
 
21
 
22
 
@@ -120,19 +120,13 @@ messages = [
120
  {
121
  "role": "system",
122
  "content":[
123
- {
124
- "type": "text",
125
- "text": SCALECUA_SYSTEM_PROMPT_GROUNDER,
126
- }
127
  ]
128
  },
129
  {
130
  "role": "user",
131
  "content": [
132
- {
133
- "type": "image",
134
- "image": "/path/to/your/image",
135
- },
136
  {"type": "text", "text": low_level_instruction},
137
  ],
138
  }
@@ -171,7 +165,7 @@ def parse_scalecua_grounder_response(response, image_width: int, image_height: i
171
  logger.info(f"Extracting coordinates from: {response}")
172
  match = re.search(r"\((\d+),\s*(\d+)\)", response)
173
  if not match:
174
- pattern = r'\((?:x=)?([-+]?\d*\.\d+|\d+)(?:,\s*(?:y=)?([-+]?\d*\.\d+|\d+))?\)'
175
  match = re.search(pattern, response)
176
  x = int(float(match.group(1)) / resized_width * width)
177
  y = int(float(match.group(2)) / resized_height * height) if match.group(2) else None
@@ -315,7 +309,8 @@ Previous operations:
315
  def format_history(history):
316
  if len(history) > 0:
317
  actions_history = [f"Step {i+1}: {low_level}" for i, low_level in enumerate(history)]
318
- return "\n".join(actions_history)
 
319
  else:
320
  return None
321
 
@@ -333,19 +328,13 @@ messages = [
333
  {
334
  "role": "system",
335
  "content":[
336
- {
337
- "type": "text",
338
- "text": SCALECUA_SYSTEM_PROMPT_AGENT,
339
- }
340
  ]
341
  },
342
  {
343
  "role": "user",
344
  "content": [
345
- {
346
- "type": "image",
347
- "image": "/path/to/your/image",
348
- },
349
  {"type": "text", "text": user_prompt},
350
  ],
351
  }
@@ -382,7 +371,8 @@ def parse_response(response: str) -> Dict:
382
  if action_matches:
383
  for match in action_matches:
384
  # Split each match by newline and strip whitespace from each line
385
- lines = [line.strip() for line in match.split('\n') if line.strip()]
 
386
  actions.extend(lines)
387
  operation_match = re.search(r'<operation>\s*(.*?)\s*</operation>', response, re.DOTALL)
388
  operation = operation_match.group(1).strip() if operation_match else None
@@ -456,12 +446,12 @@ def parse_actions(self, actions):
456
 
457
  else:
458
  if "=" in args_str:
459
- for arg in re.finditer(r"(\w+)=\[([^\]]+)\]", args_str):
460
  param = arg.group(1)
461
  list_str = arg.group(2)
462
 
463
  list_items = []
464
- for item in re.finditer(r"'([^']*)'|\"([^\"]*)\"|([^,\]]+)", list_str):
465
  val = (item.group(1) or item.group(2) or item.group(3)).strip()
466
  if val:
467
  list_items.append(val.strip('"\''))
@@ -529,9 +519,11 @@ If you find our project useful in your research, please consider citing:
529
 
530
  ```bibtex
531
  @article{liu2025scalecua,
532
- title = {ScaleCUA: Scaling Open-Source Computer Use Agents with Cross-Platform Data},
533
- author = {Liu, Zhaoyang and Xie, Jingjing and Ding, Zichen and Li, Zehao and Yang, Bowen and Wu, Zhenyu and Wang, Xuehui and Sun, Qiushi and Liu, Shi and Wang, Weiyun and Ye, Shenglong and Li, Qingyun and Dong, Xuan and Yu, Yue and Lu, Chenyu and Mo, YunXiang and Yan, Yao and Tian, Zeyue and Zhang, Xiao and Huang, Yuan and Liu, Yiqian and Su, Weijie and Luo, Gen and Yue, Xiangyu and Qi, Biqing and Chen, Kai and Zhou, Bowen and Qiao, Yu and Chen, Qifeng and Wang, Wenhai},
534
- year = {2025},
 
 
535
  url = {https://github.com/OpenGVLab/ScaleCUA}
536
  }
537
  ```
 
1
  ---
2
+ base_model:
3
+ - Qwen/Qwen2.5-VL-32B-Instruct
4
  datasets:
5
  - OpenGVLab/ScaleCUA-Data
6
  language:
7
  - en
8
+ library_name: transformers
9
+ license: apache-2.0
10
  metrics:
11
  - accuracy
 
 
12
  pipeline_tag: image-text-to-text
 
13
  tags:
14
  - agent
15
  ---
16
 
17
  # SCALECUA: SCALING UP COMPUTER USE AGENTS WITH CROSS-PLATFORM DATA
18
 
19
+ [\[📂 GitHub\]](https://github.com/OpenGVLab/ScaleCUA) [\[📜 Paper\]](https://huggingface.co/papers/2509.15221) [\[🚀 Quick Start\]](#model-loading)
20
 
21
 
22
 
 
120
  {
121
  "role": "system",
122
  "content":[
123
+ {"type": "text", "text": SCALECUA_SYSTEM_PROMPT_GROUNDER,}
 
 
 
124
  ]
125
  },
126
  {
127
  "role": "user",
128
  "content": [
129
+ {"type": "image", "image": "/path/to/your/image",},
 
 
 
130
  {"type": "text", "text": low_level_instruction},
131
  ],
132
  }
 
165
  logger.info(f"Extracting coordinates from: {response}")
166
  match = re.search(r"\((\d+),\s*(\d+)\)", response)
167
  if not match:
168
+ pattern = r'\((?:x=)?([-+]?\d*\.\d+|\d+)(?:,\s*(?:y=)?([-+]?\d*\\.\\d+|\\d+))?\)'
169
  match = re.search(pattern, response)
170
  x = int(float(match.group(1)) / resized_width * width)
171
  y = int(float(match.group(2)) / resized_height * height) if match.group(2) else None
 
309
  def format_history(history):
310
  if len(history) > 0:
311
  actions_history = [f"Step {i+1}: {low_level}" for i, low_level in enumerate(history)]
312
+ return "
313
+ ".join(actions_history)
314
  else:
315
  return None
316
 
 
328
  {
329
  "role": "system",
330
  "content":[
331
+ {"type": "text", "text": SCALECUA_SYSTEM_PROMPT_AGENT,}
 
 
 
332
  ]
333
  },
334
  {
335
  "role": "user",
336
  "content": [
337
+ {"type": "image", "image": "/path/to/your/image",},
 
 
 
338
  {"type": "text", "text": user_prompt},
339
  ],
340
  }
 
371
  if action_matches:
372
  for match in action_matches:
373
  # Split each match by newline and strip whitespace from each line
374
+ lines = [line.strip() for line in match.split('
375
+ ') if line.strip()]
376
  actions.extend(lines)
377
  operation_match = re.search(r'<operation>\s*(.*?)\s*</operation>', response, re.DOTALL)
378
  operation = operation_match.group(1).strip() if operation_match else None
 
446
 
447
  else:
448
  if "=" in args_str:
449
+ for arg in re.finditer(r"(\w+)=\\[([^\\]]+)\\]", args_str):
450
  param = arg.group(1)
451
  list_str = arg.group(2)
452
 
453
  list_items = []
454
+ for item in re.finditer(r"'([^']*)'|\"([^\"]*)\"|([^,\\]]+)", list_str):
455
  val = (item.group(1) or item.group(2) or item.group(3)).strip()
456
  if val:
457
  list_items.append(val.strip('"\''))
 
519
 
520
  ```bibtex
521
  @article{liu2025scalecua,
522
+ title = {ScaleCUA: Scaling Open-Source Computer Use Agents with Cross-Platform Data},
523
+ author = {Liu, Zhaoyang and Xie, Jingjing and Ding, Zichen and Li, Zehao and Yang, Bowen and Wu, Zhenyu and Wang, Xuehui and Sun, Qiushi and Liu, Shi and Wang, Weiyun and Ye, Shenglong and Li, Qingyun and Dong, Xuan and Yu, Yue and Lu, Chenyu and Mo, YunXiang and Yan, Yao and Tian, Zeyue and Zhang, Xiao and Huang, Yuan and Liu, Yiqian and Su, Weijie and Luo, Gen and Yue, Xiangyu and Qi, Biqing and Chen, Kai and Zhou, Bowen and Qiao, Yu and Chen, Qifeng and Wang, Wenhai},
524
+ journal = {arXiv preprint arXiv:2509.15221},
525
+ year = {2025},
526
+ note = {Preprint},
527
  url = {https://github.com/OpenGVLab/ScaleCUA}
528
  }
529
  ```