Joseph Pollack commited on
Commit
a0c936d
Β·
unverified Β·
1 Parent(s): 1c19049

adds step instructions

Browse files
Files changed (1) hide show
  1. app.py +33 -9
app.py CHANGED
@@ -157,7 +157,7 @@ class LOperatorDemo:
157
  # Initialize demo
158
  demo_instance = LOperatorDemo()
159
 
160
- def process_input(image, goal):
161
  """Process the input and generate action"""
162
  if image is None:
163
  return "❌ Please upload an Android screenshot image."
@@ -165,6 +165,9 @@ def process_input(image, goal):
165
  if not goal.strip():
166
  return "❌ Please provide a goal."
167
 
 
 
 
168
  if not demo_instance.is_loaded:
169
  return "❌ Model not loaded. Please wait for it to load automatically."
170
 
@@ -189,8 +192,8 @@ def process_input(image, goal):
189
  if pil_image.mode != "RGB":
190
  pil_image = pil_image.convert("RGB")
191
 
192
- # Generate action using goal as both goal and instruction
193
- response = demo_instance.generate_action(pil_image, goal, goal)
194
  return response
195
 
196
  except Exception as e:
@@ -237,11 +240,19 @@ def load_example_episodes():
237
  episode_num = episode_dir.split('_')[1]
238
  goal_text = metadata.get('goal', f'Episode {episode_num} example')
239
 
 
 
 
 
 
 
240
  logger.info(f"Episode {episode_num} goal: {goal_text}")
 
241
 
242
  examples.append([
243
  pil_image, # Use PIL Image object directly
244
- goal_text # Use the goal text from metadata
 
245
  ])
246
  logger.info(f"Successfully loaded example for Episode {episode_num}")
247
 
@@ -320,6 +331,13 @@ def create_demo():
320
  lines=3
321
  )
322
 
 
 
 
 
 
 
 
323
  # Process button
324
  process_btn = gr.Button("πŸš€ Generate Action", variant="primary", size="lg")
325
 
@@ -336,7 +354,7 @@ def create_demo():
336
  # Connect the process button
337
  process_btn.click(
338
  fn=process_input,
339
- inputs=[image_input, goal_input],
340
  outputs=output_text
341
  )
342
 
@@ -349,7 +367,7 @@ def create_demo():
349
  for row_start in range(0, len(examples), 3):
350
  with gr.Row():
351
  for i in range(row_start, min(row_start + 3, len(examples))):
352
- image, goal = examples[i]
353
  with gr.Column(scale=1):
354
  episode_num = i + 1
355
  gr.Markdown(f"**Episode {episode_num}**")
@@ -365,12 +383,18 @@ def create_demo():
365
  lines=3,
366
  interactive=False
367
  )
 
 
 
 
 
 
368
  # Create a button to load this example
369
  load_example_btn = gr.Button(f"Load Example {episode_num}", size="sm")
370
  load_example_btn.click(
371
- fn=lambda img, g: (img, g),
372
- inputs=[example_image, example_goal],
373
- outputs=[image_input, goal_input]
374
  )
375
  except Exception as e:
376
  logger.warning(f"Failed to load examples: {str(e)}")
 
157
  # Initialize demo
158
  demo_instance = LOperatorDemo()
159
 
160
+ def process_input(image, goal, step_instructions):
161
  """Process the input and generate action"""
162
  if image is None:
163
  return "❌ Please upload an Android screenshot image."
 
165
  if not goal.strip():
166
  return "❌ Please provide a goal."
167
 
168
+ if not step_instructions.strip():
169
+ return "❌ Please provide step instructions."
170
+
171
  if not demo_instance.is_loaded:
172
  return "❌ Model not loaded. Please wait for it to load automatically."
173
 
 
192
  if pil_image.mode != "RGB":
193
  pil_image = pil_image.convert("RGB")
194
 
195
+ # Generate action using goal and step instructions
196
+ response = demo_instance.generate_action(pil_image, goal, step_instructions)
197
  return response
198
 
199
  except Exception as e:
 
240
  episode_num = episode_dir.split('_')[1]
241
  goal_text = metadata.get('goal', f'Episode {episode_num} example')
242
 
243
+ # Get step instruction for the corresponding screenshot
244
+ step_instructions = metadata.get('step_instructions', [])
245
+ step_instruction = ""
246
+ if step_instructions and screenshot_num <= len(step_instructions):
247
+ step_instruction = step_instructions[screenshot_num - 1]
248
+
249
  logger.info(f"Episode {episode_num} goal: {goal_text}")
250
+ logger.info(f"Episode {episode_num} step instruction: {step_instruction}")
251
 
252
  examples.append([
253
  pil_image, # Use PIL Image object directly
254
+ goal_text, # Use the goal text from metadata
255
+ step_instruction # Use the step instruction for this screenshot
256
  ])
257
  logger.info(f"Successfully loaded example for Episode {episode_num}")
258
 
 
331
  lines=3
332
  )
333
 
334
+ gr.Markdown("### πŸ“ Step Instructions")
335
+ step_instructions_input = gr.Textbox(
336
+ label="Specific step instruction for this screenshot",
337
+ placeholder="e.g., Tap on the Settings icon to open the app",
338
+ lines=2
339
+ )
340
+
341
  # Process button
342
  process_btn = gr.Button("πŸš€ Generate Action", variant="primary", size="lg")
343
 
 
354
  # Connect the process button
355
  process_btn.click(
356
  fn=process_input,
357
+ inputs=[image_input, goal_input, step_instructions_input],
358
  outputs=output_text
359
  )
360
 
 
367
  for row_start in range(0, len(examples), 3):
368
  with gr.Row():
369
  for i in range(row_start, min(row_start + 3, len(examples))):
370
+ image, goal, step_instruction = examples[i]
371
  with gr.Column(scale=1):
372
  episode_num = i + 1
373
  gr.Markdown(f"**Episode {episode_num}**")
 
383
  lines=3,
384
  interactive=False
385
  )
386
+ example_step_instruction = gr.Textbox(
387
+ value=step_instruction,
388
+ label="Step Instruction",
389
+ lines=2,
390
+ interactive=False
391
+ )
392
  # Create a button to load this example
393
  load_example_btn = gr.Button(f"Load Example {episode_num}", size="sm")
394
  load_example_btn.click(
395
+ fn=lambda img, g, s: (img, g, s),
396
+ inputs=[example_image, example_goal, example_step_instruction],
397
+ outputs=[image_input, goal_input, step_instructions_input]
398
  )
399
  except Exception as e:
400
  logger.warning(f"Failed to load examples: {str(e)}")