|
|
| |
| web_loca_all_bbox_prompt = [ |
| "In the screenshot of this web page, please give me the coordinates of the element I want to click on according to my instructions (with bbox).", |
| "Based on the screenshot of the page, I give a text description and you give its corresponding location (with bbox).", |
| "In the image above, I will give a series of descriptions of the elements to be clicked. Please predict where you want to click (with bbox).", |
| "I will give textual descriptions of certain elements in the screenshot. Please predict the location of the corresponding element (with bbox).", |
| "Please identify the coordinates of the webpage elements I describe based on the provided screenshot (with bbox).", |
| "Given a screenshot, I will describe specific elements; your task is to predict their locations (with bbox).", |
| "Using the image of this webpage, can you determine the coordinates of the elements I describe (with bbox)?", |
| "In this webpage capture, I will describe certain elements. Please locate them for me (with bbox).", |
| "I'll provide textual descriptions of elements in this webpage screenshot. Can you find their coordinates (with bbox)?", |
| "From the given webpage screenshot, I need you to identify the locations of described elements (with bbox).", |
| "Based on this screenshot, I'll describe some elements. Please pinpoint their exact locations (with bbox).", |
| "For the elements I describe in this page capture, can you predict their positions (with bbox)?", |
| "I will describe elements from a webpage screenshot; your role is to locate them (with bbox).", |
| "Using the attached screenshot of a webpage, please find the coordinates of described elements (with bbox).", |
| "From the image of this webpage, I will describe elements for you to locate (with bbox).", |
| "I'll give descriptions of certain webpage elements; please identify where they are in this screenshot (with bbox).", |
| "On this webpage screenshot, I will point out elements; please predict their exact coordinates (with bbox).", |
| "In this web page image, please locate the elements as I describe them (with bbox).", |
| "Given this screenshot of a webpage, I'll describe some elements; locate them for me (with bbox).", |
| "Please use the provided webpage screenshot to locate the elements I describe (with bbox).", |
| "In the provided web page image, I'll describe specific elements. Identify their locations, please (with bbox).", |
| "With this screenshot of a webpage, can you locate the elements I describe (with bbox)?", |
| "I will describe features on this webpage screenshot; please predict their positions (with bbox).", |
| "Using the screenshot of this webpage, identify the coordinates of elements I describe (with bbox).", |
| "On this webpage capture, I'll point out specific elements for you to locate (with bbox).", |
| "Please determine the location of elements I describe in this webpage screenshot (with bbox).", |
| "I'll describe certain elements on this webpage image; your task is to find their locations (with bbox).", |
| "Using this webpage screenshot, I'll describe some elements. Please locate them (with bbox).", |
| "Based on my descriptions, find the locations of elements in this webpage screenshot (with bbox).", |
| "In this web page capture, please predict the positions of elements I describe (with bbox).", |
| "I'll give textual clues about elements in this webpage screenshot; identify their coordinates (with bbox).", |
| "Using the provided screenshot, I'll describe webpage elements for you to locate (with bbox).", |
| "From this webpage image, I will describe specific elements. Please predict their exact locations (with bbox)." |
| ] |
|
|
| |
| web_loca_all_point_prompt = [ |
| "In the screenshot of this web page, please give me the coordinates of the element I want to click on according to my instructions (with point).", |
| "Based on the screenshot of the page, I give a text description and you give its corresponding location (with point).", |
| "In the image above, I will give a series of descriptions of the elements to be clicked. Please predict where you want to click (with point).", |
| "I will give textual descriptions of certain elements in the screenshot. Please predict the location of the corresponding element (with point).", |
| "Please identify the coordinates of the webpage elements I describe based on the provided screenshot (with point).", |
| "Given a screenshot, I will describe specific elements; your task is to predict their locations (with point).", |
| "Using the image of this webpage, can you determine the coordinates of the elements I describe (with point)?", |
| "In this webpage capture, I will describe certain elements. Please locate them for me (with point).", |
| "I'll provide textual descriptions of elements in this webpage screenshot. Can you find their coordinates (with point)?", |
| "From the given webpage screenshot, I need you to identify the locations of described elements (with point).", |
| "Based on this screenshot, I'll describe some elements. Please pinpoint their exact locations (with point).", |
| "For the elements I describe in this page capture, can you predict their positions (with point)?", |
| "I will describe elements from a webpage screenshot; your role is to locate them (with point).", |
| "Using the attached screenshot of a webpage, please find the coordinates of described elements (with point).", |
| "From the image of this webpage, I will describe elements for you to locate (with point).", |
| "I'll give descriptions of certain webpage elements; please identify where they are in this screenshot (with point).", |
| "On this webpage screenshot, I will point out elements; please predict their exact coordinates (with point).", |
| "In this web page image, please locate the elements as I describe them (with point).", |
| "Given this screenshot of a webpage, I'll describe some elements; locate them for me (with point).", |
| "Please use the provided webpage screenshot to locate the elements I describe (with point).", |
| "In the provided web page image, I'll describe specific elements. Identify their locations, please (with point).", |
| "With this screenshot of a webpage, can you locate the elements I describe (with point)?", |
| "I will describe features on this webpage screenshot; please predict their positions (with point).", |
| "Using the screenshot of this webpage, identify the coordinates of elements I describe (with point).", |
| "On this webpage capture, I'll point out specific elements for you to locate (with point).", |
| "Please determine the location of elements I describe in this webpage screenshot (with point).", |
| "I'll describe certain elements on this webpage image; your task is to find their locations (with point).", |
| "Using this webpage screenshot, I'll describe some elements. Please locate them (with point).", |
| "Based on my descriptions, find the locations of elements in this webpage screenshot (with point).", |
| "In this web page capture, please predict the positions of elements I describe (with point).", |
| "I'll give textual clues about elements in this webpage screenshot; identify their coordinates (with point).", |
| "Using the provided screenshot, I'll describe webpage elements for you to locate (with point).", |
| "From this webpage image, I will describe specific elements. Please predict their exact locations (with point)." |
| ] |
|
|
| |
| web_ocr_all_bbox_prompt = [ |
| "Based on the screenshot of the web page, I give you the location to click on and you predict the text content of the corresponding element (with bbox).", |
| "In the image above, I give a series of coordinates and ask you to describe the corresponding elements (with bbox).", |
| "On this page, I will give you a series of coordinates and ask you to predict the text of the clickable element that corresponds to these coordinates (with bbox).", |
| "Given a webpage screenshot, I provide coordinates; predict the text content of the elements at these locations (with bbox).", |
| "In this screenshot, I'll give coordinates and ask you to describe the text of the elements there (with bbox).", |
| "Using the provided image of the webpage, I'll specify locations; you predict the text content of those elements (with bbox).", |
| "With this webpage capture, I provide a series of coordinates; please identify the text content of each element (with bbox).", |
| "In this page image, I'll point to specific locations; you need to predict the text of the corresponding elements (with bbox).", |
| "From this screenshot, I'll give coordinates; can you describe the text of the elements at these points (with bbox)?", |
| "Based on this web page screenshot, I provide coordinates; please predict the textual content at these spots (with bbox).", |
| "Using the given image of the webpage, I'll specify certain coordinates; describe the text of the elements there (with bbox).", |
| "On this captured webpage, I will give a series of coordinates; your task is to predict the text at these locations (with bbox).", |
| "With this webpage image, I provide coordinates; can you tell me the text of the elements at these points (with bbox)?", |
| "In the provided webpage screenshot, I'll point out locations; please describe the text of the elements there (with bbox).", |
| "From this web page capture, I give specific coordinates; predict the text content of the elements at these locations (with bbox).", |
| "Using this screenshot of a webpage, I'll indicate coordinates; can you predict the text of the elements (with bbox)?", |
| "On this image of a web page, I provide coordinates; you need to describe the text of the corresponding elements (with bbox).", |
| "Given this webpage capture, I'll specify locations; please predict the text content of the elements there (with bbox).", |
| "In this screenshot, I give a series of coordinates; your task is to predict the text content of the elements (with bbox).", |
| "From the given webpage image, I'll provide coordinates; can you describe the text of the elements at these points (with bbox)?", |
| "On this captured webpage, I provide specific coordinates; you need to predict the text of the elements there (with bbox).", |
| "Using this web page screenshot, I'll indicate locations; please describe the text content of the elements (with bbox).", |
| "With this image of a webpage, I specify coordinates; your task is to predict the text of the corresponding elements (with bbox).", |
| "In this webpage capture, I'll give coordinates; can you predict the text content of the elements at these locations (with bbox)?", |
| "Based on this screenshot, I provide a series of coordinates; describe the text of the elements there (with bbox).", |
| "Using the image of this webpage, I'll specify locations; you need to predict the text of the elements (with bbox).", |
| "On this page screenshot, I give coordinates; please predict the text content of the corresponding elements (with bbox).", |
| "From this webpage image, I'll indicate specific coordinates; can you describe the text of the elements (with bbox)?", |
| "In this web page image, I provide coordinates; your task is to predict the text of the elements at these locations (with bbox).", |
| "Given this screenshot of a webpage, I specify locations; please describe the text of the elements there (with bbox).", |
| "Using the provided page image, I'll point to locations; you predict the text content of the elements (with bbox).", |
| "On this webpage capture, I provide a series of coordinates; can you predict the text of the elements (with bbox)?", |
| "With this image of the web page, I give specific coordinates; your task is to describe the text of the elements at these points (with bbox)." |
| ] |
|
|
| |
| web_ocr_all_point_prompt = [ |
| "Based on the screenshot of the web page, I give you the location to click on and you predict the text content of the corresponding element (with point).", |
| "In the image above, I give a series of coordinates and ask you to describe the corresponding elements (with point).", |
| "On this page, I will give you a series of coordinates and ask you to predict the text of the clickable element that corresponds to these coordinates (with point).", |
| "Given a webpage screenshot, I provide coordinates; predict the text content of the elements at these locations (with point).", |
| "In this screenshot, I'll give coordinates and ask you to describe the text of the elements there (with point).", |
| "Using the provided image of the webpage, I'll specify locations; you predict the text content of those elements (with point).", |
| "With this webpage capture, I provide a series of coordinates; please identify the text content of each element (with point).", |
| "In this page image, I'll point to specific locations; you need to predict the text of the corresponding elements (with point).", |
| "From this screenshot, I'll give coordinates; can you describe the text of the elements at these points (with point)?", |
| "Based on this web page screenshot, I provide coordinates; please predict the textual content at these spots (with point).", |
| "Using the given image of the webpage, I'll specify certain coordinates; describe the text of the elements there (with point).", |
| "On this captured webpage, I will give a series of coordinates; your task is to predict the text at these locations (with point).", |
| "With this webpage image, I provide coordinates; can you tell me the text of the elements at these points (with point)?", |
| "In the provided webpage screenshot, I'll point out locations; please describe the text of the elements there (with point).", |
| "From this web page capture, I give specific coordinates; predict the text content of the elements at these locations (with point).", |
| "Using this screenshot of a webpage, I'll indicate coordinates; can you predict the text of the elements (with point)?", |
| "On this image of a web page, I provide coordinates; you need to describe the text of the corresponding elements (with point).", |
| "Given this webpage capture, I'll specify locations; please predict the text content of the elements there (with point).", |
| "In this screenshot, I give a series of coordinates; your task is to predict the text content of the elements (with point).", |
| "From the given webpage image, I'll provide coordinates; can you describe the text of the elements at these points (with point)?", |
| "On this captured webpage, I provide specific coordinates; you need to predict the text of the elements there (with point).", |
| "Using this web page screenshot, I'll indicate locations; please describe the text content of the elements (with point).", |
| "With this image of a webpage, I specify coordinates; your task is to predict the text of the corresponding elements (with point).", |
| "In this webpage capture, I'll give coordinates; can you predict the text content of the elements at these locations (with point)?", |
| "Based on this screenshot, I provide a series of coordinates; describe the text of the elements there (with point).", |
| "Using the image of this webpage, I'll specify locations; you need to predict the text of the elements (with point).", |
| "On this page screenshot, I give coordinates; please predict the text content of the corresponding elements (with point).", |
| "From this webpage image, I'll indicate specific coordinates; can you describe the text of the elements (with point)?", |
| "In this web page image, I provide coordinates; your task is to predict the text of the elements at these locations (with point).", |
| "Given this screenshot of a webpage, I specify locations; please describe the text of the elements there (with point).", |
| "Using the provided page image, I'll point to locations; you predict the text content of the elements (with point).", |
| "On this webpage capture, I provide a series of coordinates; can you predict the text of the elements (with point)?", |
| "With this image of the web page, I give specific coordinates; your task is to describe the text of the elements at these points (with point)." |
| ] |
|
|
| |
| loca_bbox_prompt = [ |
| "In this UI screenshot, what is the position of the element corresponding to the command \"{}\" (with bbox)?", |
| "In the UI, where should I click if I want to complete instruction \"{}\" (with bbox)?", |
| "In this screen, how can I navigate to the section that says \"{}\" (with bbox)?", |
| "On this page, what is the location of the button do I press to follow the command \"{}\" (with bbox)?", |
| "For the action described as \"{}\", where is the corresponding icon in this UI (with bbox)?", |
| "To execute the function \"{}\", which item in the UI should I select (in coordinates) (with bbox)?", |
| "In this UI layout, where is the tool that performs the operation \"{}\" (with bbox)?", |
| "On this screen, where can I find the feature that allows me to \"{}\" (with bbox)?", |
| "In the software interface, which menu item corresponds to the task \"{}\" (in coordinates) (with bbox)?", |
| "Within this dashboard, which widget should I interact with to \"{}\" (with bbox)?", |
| "In the UI here, I need to {}, what is the coordinates of the element is related to this (with bbox)?", |
| "If my goal is to \"{}\", which control in this interface should I use (with bbox)?", |
| "On this device screen, to achieve the outcome \"{}\", where do I tap (with bbox)?", |
| "Facing this interface, where do I access to \"{}\" (with bbox)?", |
| "In this digital interface, to initiate \"{}\", where is my point of interest (with bbox)?", |
| "When using this app, for the function \"{}\", where is the command located (with bbox)?", |
| "In this UI design, to process the instruction \"{}\", where should I activate (with bbox)?", |
| "Within this graphical user interface, to \"{}\", which icon should I be looking for (with bbox)?", |
| "On this web page, to perform \"{}\", where is the link or button I will click (with bbox)?", |
| "In this interface snapshot, to begin \"{}\", what is the clicking point (with bbox)?", |
| "When interacting with this UI, for the operation labeled \"{}\", what is my target (with bbox)?", |
| "On this software's interface, to execute the step \"{}\", where do I direct my attention (with bbox)?", |
| "In the current UI, I want to {}, where should I click (with bbox)?", |
| "In this image, I want to {}, where should I click on (with bbox)?", |
| "In the current UI, to {}, where should I click (with bbox)?", |
| "In this image, to {}, where should I click on (with bbox)?", |
| "On this screen, I need to {}, where do I click (with bbox)?", |
| "In the UI right now, to {}, where should I click (with bbox)?", |
| "In this layout, I want to {}, where is the upload button (with bbox)?", |
| "On this interface, to {}, where should I click (with bbox)?", |
| "In this view, I need to {}, which icon do I select (in coordinates) (with bbox)?", |
| "On this page, I want to {}, where is the option (with bbox)?", |
| "In this webpage, I'm trying to {}, where do I click (with bbox)?", |
| "In this software, to {}, where should I navigate (with bbox)?" |
| ] |
|
|
| |
| loca_point_prompt = [ |
| "In this UI screenshot, what is the position of the element corresponding to the command \"{}\" (with point)?", |
| "In the UI, where should I click if I want to complete instruction \"{}\" (with point)?", |
| "In this screen, how can I navigate to the section that says \"{}\" (with point)?", |
| "On this page, what is the location of the button do I press to follow the command \"{}\" (with point)?", |
| "For the action described as \"{}\", where is the corresponding icon in this UI (with point)?", |
| "To execute the function \"{}\", which item in the UI should I select (in coordinates) (with point)?", |
| "In this UI layout, where is the tool that performs the operation \"{}\" (with point)?", |
| "On this screen, where can I find the feature that allows me to \"{}\" (with point)?", |
| "In the software interface, which menu item corresponds to the task \"{}\" (in coordinates) (with point)?", |
| "Within this dashboard, which widget should I interact with to \"{}\" (with point)?", |
| "In the UI here, I need to {}, what is the coordinates of the element is related to this (with point)?", |
| "If my goal is to \"{}\", which control in this interface should I use (with point)?", |
| "On this device screen, to achieve the outcome \"{}\", where do I tap (with point)?", |
| "Facing this interface, where do I access to \"{}\" (with point)?", |
| "In this digital interface, to initiate \"{}\", where is my point of interest (with point)?", |
| "When using this app, for the function \"{}\", where is the command located (with point)?", |
| "In this UI design, to process the instruction \"{}\", where should I activate (with point)?", |
| "Within this graphical user interface, to \"{}\", which icon should I be looking for (with point)?", |
| "On this web page, to perform \"{}\", where is the link or button I will click (with point)?", |
| "In this interface snapshot, to begin \"{}\", what is the clicking point (with point)?", |
| "When interacting with this UI, for the operation labeled \"{}\", what is my target (with point)?", |
| "On this software's interface, to execute the step \"{}\", where do I direct my attention (with point)?", |
| "In the current UI, I want to {}, where should I click (with point)?", |
| "In this image, I want to {}, where should I click on (with point)?", |
| "In the current UI, to {}, where should I click (with point)?", |
| "In this image, to {}, where should I click on (with point)?", |
| "On this screen, I need to {}, where do I click (with point)?", |
| "In the UI right now, to {}, where should I click (with point)?", |
| "In this layout, I want to {}, where is the upload button (with point)?", |
| "On this interface, to {}, where should I click (with point)?", |
| "In this view, I need to {}, which icon do I select (in coordinates) (with point)?", |
| "On this page, I want to {}, where is the option (with point)?", |
| "In this webpage, I'm trying to {}, where do I click (with point)?", |
| "In this software, to {}, where should I navigate (with point)?" |
| ] |
|
|
| |
| screen_caption_prompt = [ |
| "Can you provide a detailed description of the interface screenshot shown?", |
| "Illustrate the details visible in the provided screenshot.", |
| "What does the presented screen image depict?", |
| "How would you narrate the contents of this screen capture to someone who can't see it?", |
| "Please detail the elements shown in the interface screenshot.", |
| "Describe the features and information displayed in this screenshot.", |
| "Elaborate on what is visible in the screenshot of the interface.", |
| "Give a comprehensive description of the screenshot's interface.", |
| "What information is conveyed in the screenshot displayed?", |
| "Could you depict the content and layout of the screen image provided?", |
| "Explain the visual aspects of the screenshot taken from this interface.", |
| "How would you verbally depict the interface shown in the screenshot?", |
| "What key elements are shown in this interface screenshot?", |
| "Provide a verbal representation of the screenshot's content.", |
| "Narrate the components and information visible in this interface capture.", |
| "What are the main features displayed in the screenshot of this screen?", |
| "Outline the specific details shown in the interface image.", |
| "How would you describe this screen image to someone who cannot see it?", |
| "Enumerate the elements and information present in the provided interface screenshot.", |
| "Detail the visual composition of the screen capture you see." |
| ] |
|
|
| |
| widgetcap_prompt = [ |
| "Please generate a description for the element at {}.", |
| "Describe the function of the element at {} on the screen.", |
| "What is the function of the element at {} on the UI?", |
| "What happens when you tap position {} on the screen?", |
| "What happens when you click point {} on the screen?", |
| "Can you explain what the user interface element at {} does?", |
| "What action is triggered by interacting with the area at {}?", |
| "Explain the purpose of the interactive element found at {}.", |
| "What feature is accessed by selecting the location at {}?", |
| "Identify and describe the component located at {}.", |
| "What is the outcome of selecting the element at {}?", |
| "Detail the functionality of the UI element positioned at {}.", |
| "What is the significance of the element located at {} in the application?", |
| "How does the element at {} contribute to the overall user experience?", |
| "What kind of input or interaction is expected at the point marked {}?" |
| ] |