Spaces:

Tonic
/

l-operator-demo

Running on Zero

App Files Files Community

Joseph Pollack commited on Aug 31

Commit

a0c936d

unverified ·

1 Parent(s): 1c19049

adds step instructions

Browse files

Files changed (1) hide show

app.py +33 -9

app.py CHANGED Viewed

@@ -157,7 +157,7 @@ class LOperatorDemo:
 # Initialize demo
 demo_instance = LOperatorDemo()
-def process_input(image, goal):
     """Process the input and generate action"""
     if image is None:
         return "❌ Please upload an Android screenshot image."
@@ -165,6 +165,9 @@ def process_input(image, goal):
     if not goal.strip():
         return "❌ Please provide a goal."
     if not demo_instance.is_loaded:
         return "❌ Model not loaded. Please wait for it to load automatically."
@@ -189,8 +192,8 @@ def process_input(image, goal):
         if pil_image.mode != "RGB":
             pil_image = pil_image.convert("RGB")
-        # Generate action using goal as both goal and instruction
-        response = demo_instance.generate_action(pil_image, goal, goal)
         return response
     except Exception as e:
@@ -237,11 +240,19 @@ def load_example_episodes():
                     episode_num = episode_dir.split('_')[1]
                     goal_text = metadata.get('goal', f'Episode {episode_num} example')
                     logger.info(f"Episode {episode_num} goal: {goal_text}")
                     examples.append([
                         pil_image,  # Use PIL Image object directly
-                        goal_text  # Use the goal text from metadata
                     ])
                     logger.info(f"Successfully loaded example for Episode {episode_num}")
@@ -320,6 +331,13 @@ def create_demo():
                     lines=3
                 )
                 # Process button
                 process_btn = gr.Button("🚀 Generate Action", variant="primary", size="lg")
@@ -336,7 +354,7 @@ def create_demo():
         # Connect the process button
         process_btn.click(
             fn=process_input,
-            inputs=[image_input, goal_input],
             outputs=output_text
         )
@@ -349,7 +367,7 @@ def create_demo():
                 for row_start in range(0, len(examples), 3):
                     with gr.Row():
                         for i in range(row_start, min(row_start + 3, len(examples))):
-                            image, goal = examples[i]
                             with gr.Column(scale=1):
                                 episode_num = i + 1
                                 gr.Markdown(f"**Episode {episode_num}**")
@@ -365,12 +383,18 @@ def create_demo():
                                     lines=3,
                                     interactive=False
                                 )
                                 # Create a button to load this example
                                 load_example_btn = gr.Button(f"Load Example {episode_num}", size="sm")
                                 load_example_btn.click(
-                                    fn=lambda img, g: (img, g),
-                                    inputs=[example_image, example_goal],
-                                    outputs=[image_input, goal_input]
                                 )
         except Exception as e:
             logger.warning(f"Failed to load examples: {str(e)}")

 # Initialize demo
 demo_instance = LOperatorDemo()
+def process_input(image, goal, step_instructions):
     """Process the input and generate action"""
     if image is None:
         return "❌ Please upload an Android screenshot image."
     if not goal.strip():
         return "❌ Please provide a goal."
+    if not step_instructions.strip():
+        return "❌ Please provide step instructions."
     if not demo_instance.is_loaded:
         return "❌ Model not loaded. Please wait for it to load automatically."
         if pil_image.mode != "RGB":
             pil_image = pil_image.convert("RGB")
+        # Generate action using goal and step instructions
+        response = demo_instance.generate_action(pil_image, goal, step_instructions)
         return response
     except Exception as e:
                     episode_num = episode_dir.split('_')[1]
                     goal_text = metadata.get('goal', f'Episode {episode_num} example')
+                    # Get step instruction for the corresponding screenshot
+                    step_instructions = metadata.get('step_instructions', [])
+                    step_instruction = ""
+                    if step_instructions and screenshot_num <= len(step_instructions):
+                        step_instruction = step_instructions[screenshot_num - 1]
                     logger.info(f"Episode {episode_num} goal: {goal_text}")
+                    logger.info(f"Episode {episode_num} step instruction: {step_instruction}")
                     examples.append([
                         pil_image,  # Use PIL Image object directly
+                        goal_text,  # Use the goal text from metadata
+                        step_instruction  # Use the step instruction for this screenshot
                     ])
                     logger.info(f"Successfully loaded example for Episode {episode_num}")
                     lines=3
                 )
+                gr.Markdown("### 📝 Step Instructions")
+                step_instructions_input = gr.Textbox(
+                    label="Specific step instruction for this screenshot",
+                    placeholder="e.g., Tap on the Settings icon to open the app",
+                    lines=2
+                )
                 # Process button
                 process_btn = gr.Button("🚀 Generate Action", variant="primary", size="lg")
         # Connect the process button
         process_btn.click(
             fn=process_input,
+            inputs=[image_input, goal_input, step_instructions_input],
             outputs=output_text
         )
                 for row_start in range(0, len(examples), 3):
                     with gr.Row():
                         for i in range(row_start, min(row_start + 3, len(examples))):
+                            image, goal, step_instruction = examples[i]
                             with gr.Column(scale=1):
                                 episode_num = i + 1
                                 gr.Markdown(f"**Episode {episode_num}**")
                                     lines=3,
                                     interactive=False
                                 )
+                                example_step_instruction = gr.Textbox(
+                                    value=step_instruction,
+                                    label="Step Instruction",
+                                    lines=2,
+                                    interactive=False
+                                )
                                 # Create a button to load this example
                                 load_example_btn = gr.Button(f"Load Example {episode_num}", size="sm")
                                 load_example_btn.click(
+                                    fn=lambda img, g, s: (img, g, s),
+                                    inputs=[example_image, example_goal, example_step_instruction],
+                                    outputs=[image_input, goal_input, step_instructions_input]
                                 )
         except Exception as e:
             logger.warning(f"Failed to load examples: {str(e)}")