Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -124,8 +124,8 @@ except Exception as e:
|
|
| 124 |
model_x = None
|
| 125 |
processor_x = None
|
| 126 |
|
| 127 |
-
print("🔄 Loading Holo1-
|
| 128 |
-
MODEL_ID_H = "Hcompany/Holo1-
|
| 129 |
try:
|
| 130 |
processor_h = AutoProcessor.from_pretrained(MODEL_ID_H, trust_remote_code=True)
|
| 131 |
model_h = AutoModelForImageTextToText.from_pretrained(
|
|
@@ -359,10 +359,10 @@ def process_screenshot(input_numpy_image: np.ndarray, task: str, model_choice: s
|
|
| 359 |
model, processor = model_x, processor_x
|
| 360 |
if model is None: return "Error: UI-TARS model failed to load.", None
|
| 361 |
print("Using UI-TARS Pipeline...")
|
| 362 |
-
elif model_choice == "Holo1-
|
| 363 |
model, processor = model_h, processor_h
|
| 364 |
if model is None: return "Error: Holo2-8B model failed to load.", None
|
| 365 |
-
print("Using Holo1-
|
| 366 |
else:
|
| 367 |
return f"Error: Unknown model '{model_choice}'", None
|
| 368 |
|
|
@@ -416,7 +416,7 @@ css="""
|
|
| 416 |
"""
|
| 417 |
with gr.Blocks() as demo:
|
| 418 |
gr.Markdown("# **CUA GUI Operator 🖥️**", elem_id="main-title")
|
| 419 |
-
gr.Markdown("Perform Computer Use Agent tasks with the models: [Fara-7B](https://huggingface.co/microsoft/Fara-7B), [UI-TARS-1.5-7B](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B), and [Holo](https://huggingface.co/Hcompany/Holo1-
|
| 420 |
|
| 421 |
with gr.Row():
|
| 422 |
with gr.Column(scale=2):
|
|
@@ -424,7 +424,7 @@ with gr.Blocks() as demo:
|
|
| 424 |
|
| 425 |
with gr.Row():
|
| 426 |
model_choice = gr.Radio(
|
| 427 |
-
choices=["Fara-7B", "UI-TARS-1.5-7B", "Holo1-
|
| 428 |
label="Select Model",
|
| 429 |
value="Fara-7B",
|
| 430 |
interactive=True
|
|
@@ -451,7 +451,7 @@ with gr.Blocks() as demo:
|
|
| 451 |
examples=[
|
| 452 |
["examples/1.png", "Click on the Fara-7B model.", "Fara-7B"],
|
| 453 |
["examples/2.png", "Click on the VLMs Collection", "UI-TARS-1.5-7B"],
|
| 454 |
-
["examples/3.png", "Click on the 'Real-time vision models' collection.", "Holo1-
|
| 455 |
],
|
| 456 |
inputs=[input_image, task_input, model_choice],
|
| 457 |
label="Quick Examples"
|
|
|
|
| 124 |
model_x = None
|
| 125 |
processor_x = None
|
| 126 |
|
| 127 |
+
print("🔄 Loading Holo1.5-7B...")
|
| 128 |
+
MODEL_ID_H = "Hcompany/Holo1.5-7B"
|
| 129 |
try:
|
| 130 |
processor_h = AutoProcessor.from_pretrained(MODEL_ID_H, trust_remote_code=True)
|
| 131 |
model_h = AutoModelForImageTextToText.from_pretrained(
|
|
|
|
| 359 |
model, processor = model_x, processor_x
|
| 360 |
if model is None: return "Error: UI-TARS model failed to load.", None
|
| 361 |
print("Using UI-TARS Pipeline...")
|
| 362 |
+
elif model_choice == "Holo1.5-7B":
|
| 363 |
model, processor = model_h, processor_h
|
| 364 |
if model is None: return "Error: Holo2-8B model failed to load.", None
|
| 365 |
+
print("Using Holo1.5-7B Pipeline...")
|
| 366 |
else:
|
| 367 |
return f"Error: Unknown model '{model_choice}'", None
|
| 368 |
|
|
|
|
| 416 |
"""
|
| 417 |
with gr.Blocks() as demo:
|
| 418 |
gr.Markdown("# **CUA GUI Operator 🖥️**", elem_id="main-title")
|
| 419 |
+
gr.Markdown("Perform Computer Use Agent tasks with the models: [Fara-7B](https://huggingface.co/microsoft/Fara-7B), [UI-TARS-1.5-7B](https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B), and [Holo](https://huggingface.co/Hcompany/Holo1.5-7B).")
|
| 420 |
|
| 421 |
with gr.Row():
|
| 422 |
with gr.Column(scale=2):
|
|
|
|
| 424 |
|
| 425 |
with gr.Row():
|
| 426 |
model_choice = gr.Radio(
|
| 427 |
+
choices=["Fara-7B", "UI-TARS-1.5-7B", "Holo1.5-7B"],
|
| 428 |
label="Select Model",
|
| 429 |
value="Fara-7B",
|
| 430 |
interactive=True
|
|
|
|
| 451 |
examples=[
|
| 452 |
["examples/1.png", "Click on the Fara-7B model.", "Fara-7B"],
|
| 453 |
["examples/2.png", "Click on the VLMs Collection", "UI-TARS-1.5-7B"],
|
| 454 |
+
["examples/3.png", "Click on the 'Real-time vision models' collection.", "Holo1.5-7B"],
|
| 455 |
],
|
| 456 |
inputs=[input_image, task_input, model_choice],
|
| 457 |
label="Quick Examples"
|