data / seeclick_task_prompts.py
cjfcsjt's picture
Upload seeclick_task_prompts.py with huggingface_hub
0a71441 verified
# locate all elements in a webpage (bbox)
web_loca_all_bbox_prompt = [
"In the screenshot of this web page, please give me the coordinates of the element I want to click on according to my instructions (with bbox).",
"Based on the screenshot of the page, I give a text description and you give its corresponding location (with bbox).",
"In the image above, I will give a series of descriptions of the elements to be clicked. Please predict where you want to click (with bbox).",
"I will give textual descriptions of certain elements in the screenshot. Please predict the location of the corresponding element (with bbox).",
"Please identify the coordinates of the webpage elements I describe based on the provided screenshot (with bbox).",
"Given a screenshot, I will describe specific elements; your task is to predict their locations (with bbox).",
"Using the image of this webpage, can you determine the coordinates of the elements I describe (with bbox)?",
"In this webpage capture, I will describe certain elements. Please locate them for me (with bbox).",
"I'll provide textual descriptions of elements in this webpage screenshot. Can you find their coordinates (with bbox)?",
"From the given webpage screenshot, I need you to identify the locations of described elements (with bbox).",
"Based on this screenshot, I'll describe some elements. Please pinpoint their exact locations (with bbox).",
"For the elements I describe in this page capture, can you predict their positions (with bbox)?",
"I will describe elements from a webpage screenshot; your role is to locate them (with bbox).",
"Using the attached screenshot of a webpage, please find the coordinates of described elements (with bbox).",
"From the image of this webpage, I will describe elements for you to locate (with bbox).",
"I'll give descriptions of certain webpage elements; please identify where they are in this screenshot (with bbox).",
"On this webpage screenshot, I will point out elements; please predict their exact coordinates (with bbox).",
"In this web page image, please locate the elements as I describe them (with bbox).",
"Given this screenshot of a webpage, I'll describe some elements; locate them for me (with bbox).",
"Please use the provided webpage screenshot to locate the elements I describe (with bbox).",
"In the provided web page image, I'll describe specific elements. Identify their locations, please (with bbox).",
"With this screenshot of a webpage, can you locate the elements I describe (with bbox)?",
"I will describe features on this webpage screenshot; please predict their positions (with bbox).",
"Using the screenshot of this webpage, identify the coordinates of elements I describe (with bbox).",
"On this webpage capture, I'll point out specific elements for you to locate (with bbox).",
"Please determine the location of elements I describe in this webpage screenshot (with bbox).",
"I'll describe certain elements on this webpage image; your task is to find their locations (with bbox).",
"Using this webpage screenshot, I'll describe some elements. Please locate them (with bbox).",
"Based on my descriptions, find the locations of elements in this webpage screenshot (with bbox).",
"In this web page capture, please predict the positions of elements I describe (with bbox).",
"I'll give textual clues about elements in this webpage screenshot; identify their coordinates (with bbox).",
"Using the provided screenshot, I'll describe webpage elements for you to locate (with bbox).",
"From this webpage image, I will describe specific elements. Please predict their exact locations (with bbox)."
]
# locate all elements in a webpage (point)
web_loca_all_point_prompt = [
"In the screenshot of this web page, please give me the coordinates of the element I want to click on according to my instructions (with point).",
"Based on the screenshot of the page, I give a text description and you give its corresponding location (with point).",
"In the image above, I will give a series of descriptions of the elements to be clicked. Please predict where you want to click (with point).",
"I will give textual descriptions of certain elements in the screenshot. Please predict the location of the corresponding element (with point).",
"Please identify the coordinates of the webpage elements I describe based on the provided screenshot (with point).",
"Given a screenshot, I will describe specific elements; your task is to predict their locations (with point).",
"Using the image of this webpage, can you determine the coordinates of the elements I describe (with point)?",
"In this webpage capture, I will describe certain elements. Please locate them for me (with point).",
"I'll provide textual descriptions of elements in this webpage screenshot. Can you find their coordinates (with point)?",
"From the given webpage screenshot, I need you to identify the locations of described elements (with point).",
"Based on this screenshot, I'll describe some elements. Please pinpoint their exact locations (with point).",
"For the elements I describe in this page capture, can you predict their positions (with point)?",
"I will describe elements from a webpage screenshot; your role is to locate them (with point).",
"Using the attached screenshot of a webpage, please find the coordinates of described elements (with point).",
"From the image of this webpage, I will describe elements for you to locate (with point).",
"I'll give descriptions of certain webpage elements; please identify where they are in this screenshot (with point).",
"On this webpage screenshot, I will point out elements; please predict their exact coordinates (with point).",
"In this web page image, please locate the elements as I describe them (with point).",
"Given this screenshot of a webpage, I'll describe some elements; locate them for me (with point).",
"Please use the provided webpage screenshot to locate the elements I describe (with point).",
"In the provided web page image, I'll describe specific elements. Identify their locations, please (with point).",
"With this screenshot of a webpage, can you locate the elements I describe (with point)?",
"I will describe features on this webpage screenshot; please predict their positions (with point).",
"Using the screenshot of this webpage, identify the coordinates of elements I describe (with point).",
"On this webpage capture, I'll point out specific elements for you to locate (with point).",
"Please determine the location of elements I describe in this webpage screenshot (with point).",
"I'll describe certain elements on this webpage image; your task is to find their locations (with point).",
"Using this webpage screenshot, I'll describe some elements. Please locate them (with point).",
"Based on my descriptions, find the locations of elements in this webpage screenshot (with point).",
"In this web page capture, please predict the positions of elements I describe (with point).",
"I'll give textual clues about elements in this webpage screenshot; identify their coordinates (with point).",
"Using the provided screenshot, I'll describe webpage elements for you to locate (with point).",
"From this webpage image, I will describe specific elements. Please predict their exact locations (with point)."
]
# ocr all elements in a webpage (bbox)
web_ocr_all_bbox_prompt = [
"Based on the screenshot of the web page, I give you the location to click on and you predict the text content of the corresponding element (with bbox).",
"In the image above, I give a series of coordinates and ask you to describe the corresponding elements (with bbox).",
"On this page, I will give you a series of coordinates and ask you to predict the text of the clickable element that corresponds to these coordinates (with bbox).",
"Given a webpage screenshot, I provide coordinates; predict the text content of the elements at these locations (with bbox).",
"In this screenshot, I'll give coordinates and ask you to describe the text of the elements there (with bbox).",
"Using the provided image of the webpage, I'll specify locations; you predict the text content of those elements (with bbox).",
"With this webpage capture, I provide a series of coordinates; please identify the text content of each element (with bbox).",
"In this page image, I'll point to specific locations; you need to predict the text of the corresponding elements (with bbox).",
"From this screenshot, I'll give coordinates; can you describe the text of the elements at these points (with bbox)?",
"Based on this web page screenshot, I provide coordinates; please predict the textual content at these spots (with bbox).",
"Using the given image of the webpage, I'll specify certain coordinates; describe the text of the elements there (with bbox).",
"On this captured webpage, I will give a series of coordinates; your task is to predict the text at these locations (with bbox).",
"With this webpage image, I provide coordinates; can you tell me the text of the elements at these points (with bbox)?",
"In the provided webpage screenshot, I'll point out locations; please describe the text of the elements there (with bbox).",
"From this web page capture, I give specific coordinates; predict the text content of the elements at these locations (with bbox).",
"Using this screenshot of a webpage, I'll indicate coordinates; can you predict the text of the elements (with bbox)?",
"On this image of a web page, I provide coordinates; you need to describe the text of the corresponding elements (with bbox).",
"Given this webpage capture, I'll specify locations; please predict the text content of the elements there (with bbox).",
"In this screenshot, I give a series of coordinates; your task is to predict the text content of the elements (with bbox).",
"From the given webpage image, I'll provide coordinates; can you describe the text of the elements at these points (with bbox)?",
"On this captured webpage, I provide specific coordinates; you need to predict the text of the elements there (with bbox).",
"Using this web page screenshot, I'll indicate locations; please describe the text content of the elements (with bbox).",
"With this image of a webpage, I specify coordinates; your task is to predict the text of the corresponding elements (with bbox).",
"In this webpage capture, I'll give coordinates; can you predict the text content of the elements at these locations (with bbox)?",
"Based on this screenshot, I provide a series of coordinates; describe the text of the elements there (with bbox).",
"Using the image of this webpage, I'll specify locations; you need to predict the text of the elements (with bbox).",
"On this page screenshot, I give coordinates; please predict the text content of the corresponding elements (with bbox).",
"From this webpage image, I'll indicate specific coordinates; can you describe the text of the elements (with bbox)?",
"In this web page image, I provide coordinates; your task is to predict the text of the elements at these locations (with bbox).",
"Given this screenshot of a webpage, I specify locations; please describe the text of the elements there (with bbox).",
"Using the provided page image, I'll point to locations; you predict the text content of the elements (with bbox).",
"On this webpage capture, I provide a series of coordinates; can you predict the text of the elements (with bbox)?",
"With this image of the web page, I give specific coordinates; your task is to describe the text of the elements at these points (with bbox)."
]
# ocr all elements in a webpage (point)
web_ocr_all_point_prompt = [
"Based on the screenshot of the web page, I give you the location to click on and you predict the text content of the corresponding element (with point).",
"In the image above, I give a series of coordinates and ask you to describe the corresponding elements (with point).",
"On this page, I will give you a series of coordinates and ask you to predict the text of the clickable element that corresponds to these coordinates (with point).",
"Given a webpage screenshot, I provide coordinates; predict the text content of the elements at these locations (with point).",
"In this screenshot, I'll give coordinates and ask you to describe the text of the elements there (with point).",
"Using the provided image of the webpage, I'll specify locations; you predict the text content of those elements (with point).",
"With this webpage capture, I provide a series of coordinates; please identify the text content of each element (with point).",
"In this page image, I'll point to specific locations; you need to predict the text of the corresponding elements (with point).",
"From this screenshot, I'll give coordinates; can you describe the text of the elements at these points (with point)?",
"Based on this web page screenshot, I provide coordinates; please predict the textual content at these spots (with point).",
"Using the given image of the webpage, I'll specify certain coordinates; describe the text of the elements there (with point).",
"On this captured webpage, I will give a series of coordinates; your task is to predict the text at these locations (with point).",
"With this webpage image, I provide coordinates; can you tell me the text of the elements at these points (with point)?",
"In the provided webpage screenshot, I'll point out locations; please describe the text of the elements there (with point).",
"From this web page capture, I give specific coordinates; predict the text content of the elements at these locations (with point).",
"Using this screenshot of a webpage, I'll indicate coordinates; can you predict the text of the elements (with point)?",
"On this image of a web page, I provide coordinates; you need to describe the text of the corresponding elements (with point).",
"Given this webpage capture, I'll specify locations; please predict the text content of the elements there (with point).",
"In this screenshot, I give a series of coordinates; your task is to predict the text content of the elements (with point).",
"From the given webpage image, I'll provide coordinates; can you describe the text of the elements at these points (with point)?",
"On this captured webpage, I provide specific coordinates; you need to predict the text of the elements there (with point).",
"Using this web page screenshot, I'll indicate locations; please describe the text content of the elements (with point).",
"With this image of a webpage, I specify coordinates; your task is to predict the text of the corresponding elements (with point).",
"In this webpage capture, I'll give coordinates; can you predict the text content of the elements at these locations (with point)?",
"Based on this screenshot, I provide a series of coordinates; describe the text of the elements there (with point).",
"Using the image of this webpage, I'll specify locations; you need to predict the text of the elements (with point).",
"On this page screenshot, I give coordinates; please predict the text content of the corresponding elements (with point).",
"From this webpage image, I'll indicate specific coordinates; can you describe the text of the elements (with point)?",
"In this web page image, I provide coordinates; your task is to predict the text of the elements at these locations (with point).",
"Given this screenshot of a webpage, I specify locations; please describe the text of the elements there (with point).",
"Using the provided page image, I'll point to locations; you predict the text content of the elements (with point).",
"On this webpage capture, I provide a series of coordinates; can you predict the text of the elements (with point)?",
"With this image of the web page, I give specific coordinates; your task is to describe the text of the elements at these points (with point)."
]
# locate screen element(bbox)
loca_bbox_prompt = [
"In this UI screenshot, what is the position of the element corresponding to the command \"{}\" (with bbox)?",
"In the UI, where should I click if I want to complete instruction \"{}\" (with bbox)?",
"In this screen, how can I navigate to the section that says \"{}\" (with bbox)?",
"On this page, what is the location of the button do I press to follow the command \"{}\" (with bbox)?",
"For the action described as \"{}\", where is the corresponding icon in this UI (with bbox)?",
"To execute the function \"{}\", which item in the UI should I select (in coordinates) (with bbox)?",
"In this UI layout, where is the tool that performs the operation \"{}\" (with bbox)?",
"On this screen, where can I find the feature that allows me to \"{}\" (with bbox)?",
"In the software interface, which menu item corresponds to the task \"{}\" (in coordinates) (with bbox)?",
"Within this dashboard, which widget should I interact with to \"{}\" (with bbox)?",
"In the UI here, I need to {}, what is the coordinates of the element is related to this (with bbox)?",
"If my goal is to \"{}\", which control in this interface should I use (with bbox)?",
"On this device screen, to achieve the outcome \"{}\", where do I tap (with bbox)?",
"Facing this interface, where do I access to \"{}\" (with bbox)?",
"In this digital interface, to initiate \"{}\", where is my point of interest (with bbox)?",
"When using this app, for the function \"{}\", where is the command located (with bbox)?",
"In this UI design, to process the instruction \"{}\", where should I activate (with bbox)?",
"Within this graphical user interface, to \"{}\", which icon should I be looking for (with bbox)?",
"On this web page, to perform \"{}\", where is the link or button I will click (with bbox)?",
"In this interface snapshot, to begin \"{}\", what is the clicking point (with bbox)?",
"When interacting with this UI, for the operation labeled \"{}\", what is my target (with bbox)?",
"On this software's interface, to execute the step \"{}\", where do I direct my attention (with bbox)?",
"In the current UI, I want to {}, where should I click (with bbox)?",
"In this image, I want to {}, where should I click on (with bbox)?",
"In the current UI, to {}, where should I click (with bbox)?",
"In this image, to {}, where should I click on (with bbox)?",
"On this screen, I need to {}, where do I click (with bbox)?",
"In the UI right now, to {}, where should I click (with bbox)?",
"In this layout, I want to {}, where is the upload button (with bbox)?",
"On this interface, to {}, where should I click (with bbox)?",
"In this view, I need to {}, which icon do I select (in coordinates) (with bbox)?",
"On this page, I want to {}, where is the option (with bbox)?",
"In this webpage, I'm trying to {}, where do I click (with bbox)?",
"In this software, to {}, where should I navigate (with bbox)?"
]
# locate screen element(point)
loca_point_prompt = [
"In this UI screenshot, what is the position of the element corresponding to the command \"{}\" (with point)?",
"In the UI, where should I click if I want to complete instruction \"{}\" (with point)?",
"In this screen, how can I navigate to the section that says \"{}\" (with point)?",
"On this page, what is the location of the button do I press to follow the command \"{}\" (with point)?",
"For the action described as \"{}\", where is the corresponding icon in this UI (with point)?",
"To execute the function \"{}\", which item in the UI should I select (in coordinates) (with point)?",
"In this UI layout, where is the tool that performs the operation \"{}\" (with point)?",
"On this screen, where can I find the feature that allows me to \"{}\" (with point)?",
"In the software interface, which menu item corresponds to the task \"{}\" (in coordinates) (with point)?",
"Within this dashboard, which widget should I interact with to \"{}\" (with point)?",
"In the UI here, I need to {}, what is the coordinates of the element is related to this (with point)?",
"If my goal is to \"{}\", which control in this interface should I use (with point)?",
"On this device screen, to achieve the outcome \"{}\", where do I tap (with point)?",
"Facing this interface, where do I access to \"{}\" (with point)?",
"In this digital interface, to initiate \"{}\", where is my point of interest (with point)?",
"When using this app, for the function \"{}\", where is the command located (with point)?",
"In this UI design, to process the instruction \"{}\", where should I activate (with point)?",
"Within this graphical user interface, to \"{}\", which icon should I be looking for (with point)?",
"On this web page, to perform \"{}\", where is the link or button I will click (with point)?",
"In this interface snapshot, to begin \"{}\", what is the clicking point (with point)?",
"When interacting with this UI, for the operation labeled \"{}\", what is my target (with point)?",
"On this software's interface, to execute the step \"{}\", where do I direct my attention (with point)?",
"In the current UI, I want to {}, where should I click (with point)?",
"In this image, I want to {}, where should I click on (with point)?",
"In the current UI, to {}, where should I click (with point)?",
"In this image, to {}, where should I click on (with point)?",
"On this screen, I need to {}, where do I click (with point)?",
"In the UI right now, to {}, where should I click (with point)?",
"In this layout, I want to {}, where is the upload button (with point)?",
"On this interface, to {}, where should I click (with point)?",
"In this view, I need to {}, which icon do I select (in coordinates) (with point)?",
"On this page, I want to {}, where is the option (with point)?",
"In this webpage, I'm trying to {}, where do I click (with point)?",
"In this software, to {}, where should I navigate (with point)?"
]
# screen caption
screen_caption_prompt = [
"Can you provide a detailed description of the interface screenshot shown?",
"Illustrate the details visible in the provided screenshot.",
"What does the presented screen image depict?",
"How would you narrate the contents of this screen capture to someone who can't see it?",
"Please detail the elements shown in the interface screenshot.",
"Describe the features and information displayed in this screenshot.",
"Elaborate on what is visible in the screenshot of the interface.",
"Give a comprehensive description of the screenshot's interface.",
"What information is conveyed in the screenshot displayed?",
"Could you depict the content and layout of the screen image provided?",
"Explain the visual aspects of the screenshot taken from this interface.",
"How would you verbally depict the interface shown in the screenshot?",
"What key elements are shown in this interface screenshot?",
"Provide a verbal representation of the screenshot's content.",
"Narrate the components and information visible in this interface capture.",
"What are the main features displayed in the screenshot of this screen?",
"Outline the specific details shown in the interface image.",
"How would you describe this screen image to someone who cannot see it?",
"Enumerate the elements and information present in the provided interface screenshot.",
"Detail the visual composition of the screen capture you see."
]
# widget captioning
widgetcap_prompt = [
"Please generate a description for the element at {}.",
"Describe the function of the element at {} on the screen.",
"What is the function of the element at {} on the UI?",
"What happens when you tap position {} on the screen?",
"What happens when you click point {} on the screen?",
"Can you explain what the user interface element at {} does?",
"What action is triggered by interacting with the area at {}?",
"Explain the purpose of the interactive element found at {}.",
"What feature is accessed by selecting the location at {}?",
"Identify and describe the component located at {}.",
"What is the outcome of selecting the element at {}?",
"Detail the functionality of the UI element positioned at {}.",
"What is the significance of the element located at {} in the application?",
"How does the element at {} contribute to the overall user experience?",
"What kind of input or interaction is expected at the point marked {}?"
]