'
h += (f'')
tok_idx = 0
if out_info:
stats = _parse_out_info_dict(out_info)
bits = []
if "forward_step" in stats:
bits.append(f"{stats['forward_step']} steps")
if "num_tokens" in stats:
bits.append(f"{stats['num_tokens']} tokens")
if "num_boxes" in stats:
bits.append(f"{stats['num_boxes']} boxes")
if "switch_to_ar" in stats:
n = stats["switch_to_ar"]
bits.append(f"{n} AR fallback{'s' if n != '1' else ''}")
if "tps" in stats:
bits.append(f"{stats['tps']} tok/s")
if "bps" in stats:
bits.append(f"{stats['bps']} box/s")
summary = " · ".join(bits) if bits else out_info.strip()
h += (f'
'
f'{summary}
')
h += f'
'
if token_sequence:
for item in token_sequence:
if not isinstance(item, (list, tuple)) or len(item) < 2:
continue
decode_type = str(item[0]).lower()
text = str(item[1])
safe = text.replace("<", "<").replace(">", ">")
delay = f"{tok_idx * 0.06:.2f}s"
cls = f"tk-ar-{uid}" if decode_type == "ar" else f"tk-mtp-{uid}"
h += f'{safe} '
tok_idx += 1
h += '
'
if raw_text:
safe_raw = raw_text.replace("<", "<").replace(">", ">")
h += (f'
Raw Response
'
f'{safe_raw}
')
h += '
'
return h
def generate_raw_prompt(task_type, category):
if not category:
category = "objects"
cats = "".join(c.strip() for c in category.split(",") if c.strip())
if task_type == "Detection":
return f"Locate all the instances that matches the following description: {cats}."
elif task_type == "Grounding":
return f"Locate all the instances that match the following description: {cats}."
elif task_type == "OCR":
return "Detect all the text in box format."
elif task_type == "GUI":
return f"Locate the region that matches the following description: {cats}."
elif task_type == "Pointing":
return f"Point to: {cats}."
else:
return f"Locate all the instances that matches the following description: {cats}."
# ============================================================
# 模型初始化
# ============================================================
GLOBAL_WORKER = None
def get_worker():
global GLOBAL_WORKER
if GLOBAL_WORKER is None:
try:
MODEL_PATH = os.environ.get("MODEL_PATH", "nvidia/LocateAnything-3B")
print(f"Loading model inside @spaces.GPU context: {MODEL_PATH}")
GLOBAL_WORKER = EagleWorker(MODEL_PATH)
except Exception as e:
print(f"Failed to load model: {e}. Will run in Mock Mode.")
GLOBAL_WORKER = None
return GLOBAL_WORKER
def _prepare_image_for_model(pil_img, short_size):
process_img = pil_img.copy()
if short_size is not None and short_size > 0:
process_img, _ = resize_image_short_side(process_img, min(int(short_size), 1024))
else:
if min(process_img.size) > 1024:
process_img, _ = resize_image_short_side(process_img, 1024)
return process_img
# ============================================================
# 用户数据收集(HuggingFace Public Dataset)
# 策略:one-record-per-file,配合按日目录 + 容器级 SESSION_ID
# 每条记录:data/