File size: 6,487 Bytes

0c51b93

import copy

clip_env_prompts = {
    "metaworld_sweep-into-v2": "The green cube is in the hole.", # unsolved there is reward issue
    "metaworld_drawer-open-v2": "The drawer is opened.", # let's try the flipped version.
    "metaworld_door-open-v2": "The safe door is opened.", # let's try the flipped version.
    "metaworld_soccer-v2": "The soccer ball is in the goal.", # not solved, there is reward issue

    "CartPole-v1": "pole vertically upright on top of the cart.",
    
    "softgym_RopeFlattenEasy": "The blue rope is straightened.",
    "softgym_PassWater": "The container, which holds water, is as close to the red circle as possible without causing too many water droplets to spill.",
    "softgym_ClothFoldDiagonal": "The cloth is folded diagonally from top left corner to bottom right corner.",
}

# what RL-VLM-F uses
goal_env_prompts = {
    "metaworld_sweep-into-v2": "to minimize the distance between the green cube and the hole", # unsolved there is reward issue
    "metaworld_drawer-open-v2": "to open the drawer", # let's try the flipped version.
    "metaworld_door-open-v2": "to open the safe door", # let's try the flipped version.
    "metaworld_soccer-v2": "to move the soccer ball into the goal", # not solved, there is reward issue
    "CartPole-v1": "to balance the brown pole on the black cart to be upright",
    "softgym_RopeFlattenEasy": "to straighten the blue rope",
    "softgym_PassWater": "to move the container, which holds water, to be as close to the red circle as possible without causing too many water droplets to spill",
    "softgym_ClothFoldDiagonal": "to fold the cloth diagonally from top left corner to bottom right corner",
}


##########################################################################
### asking gemini to output a preference with 2 stage analysis ###############
#########################################################################
gemini_free_query_prompt1 =  """
Consider the following two images:
Image 1:
"""

gemini_free_query_prompt2 = """
Image 2:
"""

gemini_free_query_env_prompts = {}
gemini_free_query_template = """
1. What is shown in Image 1?
2. What is shown in Image 2?
3. The goal is {}. Is there any difference between Image 1 and Image 2 in terms of achieving the goal?
"""

for env_name, prompt in goal_env_prompts.items():
    gemini_free_query_env_prompts[env_name] = gemini_free_query_template.format(prompt)
    
### preference summary prompt
gemini_summary_env_prompts = {}

### template 1 
gemini_summary_template = """
Based on the text below to the questions:
1. What is shown in Image 1?
2. What is shown in Image 2?
3. The goal is {}. Is there any difference between Image 1 and Image 2 in terms of achieving the goal?
{}

Is the goal better achieved in Image 1 or Image 2?
Reply a single line of 0 if the goal is better achieved in Image 1, or 1 if it is better achieved in Image 2.
Reply -1 if the text is unsure or there is no difference.
""" 

for env_name, prompt in goal_env_prompts.items():
    gemini_summary_env_prompts[env_name] = gemini_summary_template.format(prompt, "{}")
    
######################################################################
### asking gemini to output a preference with 1 stage analysis ###############
######################################################################

gemini_single_query_prompt_template = """
1. What is shown in Image 1?
2. What is shown in Image 2?
3. The goal is {}. Is there any difference between Image 1 and Image 2 in terms of achieving the goal?

Is the goal better achieved in Image 1 or Image 2?
Reply a single line of 0 if the goal is better achieved in Image 1, or 1 if it is better achieved in Image 2.
Reply -1 if the text is unsure or there is no difference.
"""

gemini_single_query_env_prompts = {}
for env_name, prompt in goal_env_prompts.items():
    gemini_single_query_env_prompts[env_name] = gemini_single_query_prompt_template.format(prompt)
    
######################################################################
### asking gemini to output a preference with only labeling ###############
######################################################################
gemini_single_query_no_analysis_prompt_template = """
The goal is {}. Is the goal better achieved in Image 1 or Image 2?
At the end of the response, reply a single line of: 
0 if the goal is better achieved in Image 1, 
1 if it is better achieved in Image 2, or
-1 if there is no difference or if it is unclear.
"""

### asking gemini to output score
gemini_score_prompt_start = """
Consider the following image:
"""

gemini_score_template = """
1. What is shown in the image?
2. The goal is {}. On a scale of 0 to 1, the score is 1 if the goal is achieved. What score would you give the image in terms of achieving the goal?
"""

gemini_score_env_prompts = {}
for env_name, prompt in goal_env_prompts.items():
    gemini_score_env_prompts[env_name] = gemini_score_template.format(prompt)

gemini_score_summary_template = """
Based on the text below to the questions: 
1. What is shown in the image?
2. The goal is {}. On a scale of 0 to 1, the score is 1 if the goal is achieved. What score would you give the image in terms of achieving the goal?
{}

Please reply a single line of the score the text has given.
Reply -1 if the text is unsure.
"""

gemini_score_summary_env_prompts = {}
for env_name, prompt in goal_env_prompts.items():
    gemini_score_summary_env_prompts[env_name] = gemini_score_summary_template.format(prompt, "{}")
    
    
### GPT uses the same prompt template as gemini
gpt_free_query_env_prompts = {}
gpt_free_query_template = copy.deepcopy(gemini_free_query_template)
for env_name, prompt in goal_env_prompts.items():
    gpt_free_query_env_prompts[env_name] = gpt_free_query_template.format(prompt)

gpt_summary_env_prompts = {}
gpt_summary_template = copy.deepcopy(gemini_summary_template)
for env_name, prompt in goal_env_prompts.items():
    gpt_summary_env_prompts[env_name] = gpt_summary_template.format(prompt, "{}")


gpt_score_query_env_prompts = {}
gpt_score_template = copy.deepcopy(gemini_score_template)
for env_name, prompt in goal_env_prompts.items():
    gpt_score_query_env_prompts[env_name] = gpt_score_template.format(prompt)

gpt_score_summary_env_prompts = {}
gpt_score_summary_template = copy.deepcopy(gemini_score_summary_template)
for env_name, prompt in goal_env_prompts.items():
    gpt_score_summary_env_prompts[env_name] = gpt_score_summary_template.format(prompt, "{}")