Spaces:
Running
on
Zero
Running
on
Zero
Joseph Pollack
commited on
adds step instructions
Browse files
app.py
CHANGED
|
@@ -157,7 +157,7 @@ class LOperatorDemo:
|
|
| 157 |
# Initialize demo
|
| 158 |
demo_instance = LOperatorDemo()
|
| 159 |
|
| 160 |
-
def process_input(image, goal):
|
| 161 |
"""Process the input and generate action"""
|
| 162 |
if image is None:
|
| 163 |
return "β Please upload an Android screenshot image."
|
|
@@ -165,6 +165,9 @@ def process_input(image, goal):
|
|
| 165 |
if not goal.strip():
|
| 166 |
return "β Please provide a goal."
|
| 167 |
|
|
|
|
|
|
|
|
|
|
| 168 |
if not demo_instance.is_loaded:
|
| 169 |
return "β Model not loaded. Please wait for it to load automatically."
|
| 170 |
|
|
@@ -189,8 +192,8 @@ def process_input(image, goal):
|
|
| 189 |
if pil_image.mode != "RGB":
|
| 190 |
pil_image = pil_image.convert("RGB")
|
| 191 |
|
| 192 |
-
# Generate action using goal
|
| 193 |
-
response = demo_instance.generate_action(pil_image, goal,
|
| 194 |
return response
|
| 195 |
|
| 196 |
except Exception as e:
|
|
@@ -237,11 +240,19 @@ def load_example_episodes():
|
|
| 237 |
episode_num = episode_dir.split('_')[1]
|
| 238 |
goal_text = metadata.get('goal', f'Episode {episode_num} example')
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
logger.info(f"Episode {episode_num} goal: {goal_text}")
|
|
|
|
| 241 |
|
| 242 |
examples.append([
|
| 243 |
pil_image, # Use PIL Image object directly
|
| 244 |
-
goal_text # Use the goal text from metadata
|
|
|
|
| 245 |
])
|
| 246 |
logger.info(f"Successfully loaded example for Episode {episode_num}")
|
| 247 |
|
|
@@ -320,6 +331,13 @@ def create_demo():
|
|
| 320 |
lines=3
|
| 321 |
)
|
| 322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
# Process button
|
| 324 |
process_btn = gr.Button("π Generate Action", variant="primary", size="lg")
|
| 325 |
|
|
@@ -336,7 +354,7 @@ def create_demo():
|
|
| 336 |
# Connect the process button
|
| 337 |
process_btn.click(
|
| 338 |
fn=process_input,
|
| 339 |
-
inputs=[image_input, goal_input],
|
| 340 |
outputs=output_text
|
| 341 |
)
|
| 342 |
|
|
@@ -349,7 +367,7 @@ def create_demo():
|
|
| 349 |
for row_start in range(0, len(examples), 3):
|
| 350 |
with gr.Row():
|
| 351 |
for i in range(row_start, min(row_start + 3, len(examples))):
|
| 352 |
-
image, goal = examples[i]
|
| 353 |
with gr.Column(scale=1):
|
| 354 |
episode_num = i + 1
|
| 355 |
gr.Markdown(f"**Episode {episode_num}**")
|
|
@@ -365,12 +383,18 @@ def create_demo():
|
|
| 365 |
lines=3,
|
| 366 |
interactive=False
|
| 367 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
# Create a button to load this example
|
| 369 |
load_example_btn = gr.Button(f"Load Example {episode_num}", size="sm")
|
| 370 |
load_example_btn.click(
|
| 371 |
-
fn=lambda img, g: (img, g),
|
| 372 |
-
inputs=[example_image, example_goal],
|
| 373 |
-
outputs=[image_input, goal_input]
|
| 374 |
)
|
| 375 |
except Exception as e:
|
| 376 |
logger.warning(f"Failed to load examples: {str(e)}")
|
|
|
|
| 157 |
# Initialize demo
|
| 158 |
demo_instance = LOperatorDemo()
|
| 159 |
|
| 160 |
+
def process_input(image, goal, step_instructions):
|
| 161 |
"""Process the input and generate action"""
|
| 162 |
if image is None:
|
| 163 |
return "β Please upload an Android screenshot image."
|
|
|
|
| 165 |
if not goal.strip():
|
| 166 |
return "β Please provide a goal."
|
| 167 |
|
| 168 |
+
if not step_instructions.strip():
|
| 169 |
+
return "β Please provide step instructions."
|
| 170 |
+
|
| 171 |
if not demo_instance.is_loaded:
|
| 172 |
return "β Model not loaded. Please wait for it to load automatically."
|
| 173 |
|
|
|
|
| 192 |
if pil_image.mode != "RGB":
|
| 193 |
pil_image = pil_image.convert("RGB")
|
| 194 |
|
| 195 |
+
# Generate action using goal and step instructions
|
| 196 |
+
response = demo_instance.generate_action(pil_image, goal, step_instructions)
|
| 197 |
return response
|
| 198 |
|
| 199 |
except Exception as e:
|
|
|
|
| 240 |
episode_num = episode_dir.split('_')[1]
|
| 241 |
goal_text = metadata.get('goal', f'Episode {episode_num} example')
|
| 242 |
|
| 243 |
+
# Get step instruction for the corresponding screenshot
|
| 244 |
+
step_instructions = metadata.get('step_instructions', [])
|
| 245 |
+
step_instruction = ""
|
| 246 |
+
if step_instructions and screenshot_num <= len(step_instructions):
|
| 247 |
+
step_instruction = step_instructions[screenshot_num - 1]
|
| 248 |
+
|
| 249 |
logger.info(f"Episode {episode_num} goal: {goal_text}")
|
| 250 |
+
logger.info(f"Episode {episode_num} step instruction: {step_instruction}")
|
| 251 |
|
| 252 |
examples.append([
|
| 253 |
pil_image, # Use PIL Image object directly
|
| 254 |
+
goal_text, # Use the goal text from metadata
|
| 255 |
+
step_instruction # Use the step instruction for this screenshot
|
| 256 |
])
|
| 257 |
logger.info(f"Successfully loaded example for Episode {episode_num}")
|
| 258 |
|
|
|
|
| 331 |
lines=3
|
| 332 |
)
|
| 333 |
|
| 334 |
+
gr.Markdown("### π Step Instructions")
|
| 335 |
+
step_instructions_input = gr.Textbox(
|
| 336 |
+
label="Specific step instruction for this screenshot",
|
| 337 |
+
placeholder="e.g., Tap on the Settings icon to open the app",
|
| 338 |
+
lines=2
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
# Process button
|
| 342 |
process_btn = gr.Button("π Generate Action", variant="primary", size="lg")
|
| 343 |
|
|
|
|
| 354 |
# Connect the process button
|
| 355 |
process_btn.click(
|
| 356 |
fn=process_input,
|
| 357 |
+
inputs=[image_input, goal_input, step_instructions_input],
|
| 358 |
outputs=output_text
|
| 359 |
)
|
| 360 |
|
|
|
|
| 367 |
for row_start in range(0, len(examples), 3):
|
| 368 |
with gr.Row():
|
| 369 |
for i in range(row_start, min(row_start + 3, len(examples))):
|
| 370 |
+
image, goal, step_instruction = examples[i]
|
| 371 |
with gr.Column(scale=1):
|
| 372 |
episode_num = i + 1
|
| 373 |
gr.Markdown(f"**Episode {episode_num}**")
|
|
|
|
| 383 |
lines=3,
|
| 384 |
interactive=False
|
| 385 |
)
|
| 386 |
+
example_step_instruction = gr.Textbox(
|
| 387 |
+
value=step_instruction,
|
| 388 |
+
label="Step Instruction",
|
| 389 |
+
lines=2,
|
| 390 |
+
interactive=False
|
| 391 |
+
)
|
| 392 |
# Create a button to load this example
|
| 393 |
load_example_btn = gr.Button(f"Load Example {episode_num}", size="sm")
|
| 394 |
load_example_btn.click(
|
| 395 |
+
fn=lambda img, g, s: (img, g, s),
|
| 396 |
+
inputs=[example_image, example_goal, example_step_instruction],
|
| 397 |
+
outputs=[image_input, goal_input, step_instructions_input]
|
| 398 |
)
|
| 399 |
except Exception as e:
|
| 400 |
logger.warning(f"Failed to load examples: {str(e)}")
|