File size: 6,487 Bytes
0c51b93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import copy
clip_env_prompts = {
"metaworld_sweep-into-v2": "The green cube is in the hole.", # unsolved there is reward issue
"metaworld_drawer-open-v2": "The drawer is opened.", # let's try the flipped version.
"metaworld_door-open-v2": "The safe door is opened.", # let's try the flipped version.
"metaworld_soccer-v2": "The soccer ball is in the goal.", # not solved, there is reward issue
"CartPole-v1": "pole vertically upright on top of the cart.",
"softgym_RopeFlattenEasy": "The blue rope is straightened.",
"softgym_PassWater": "The container, which holds water, is as close to the red circle as possible without causing too many water droplets to spill.",
"softgym_ClothFoldDiagonal": "The cloth is folded diagonally from top left corner to bottom right corner.",
}
# what RL-VLM-F uses
goal_env_prompts = {
"metaworld_sweep-into-v2": "to minimize the distance between the green cube and the hole", # unsolved there is reward issue
"metaworld_drawer-open-v2": "to open the drawer", # let's try the flipped version.
"metaworld_door-open-v2": "to open the safe door", # let's try the flipped version.
"metaworld_soccer-v2": "to move the soccer ball into the goal", # not solved, there is reward issue
"CartPole-v1": "to balance the brown pole on the black cart to be upright",
"softgym_RopeFlattenEasy": "to straighten the blue rope",
"softgym_PassWater": "to move the container, which holds water, to be as close to the red circle as possible without causing too many water droplets to spill",
"softgym_ClothFoldDiagonal": "to fold the cloth diagonally from top left corner to bottom right corner",
}
##########################################################################
### asking gemini to output a preference with 2 stage analysis ###############
#########################################################################
gemini_free_query_prompt1 = """
Consider the following two images:
Image 1:
"""
gemini_free_query_prompt2 = """
Image 2:
"""
gemini_free_query_env_prompts = {}
gemini_free_query_template = """
1. What is shown in Image 1?
2. What is shown in Image 2?
3. The goal is {}. Is there any difference between Image 1 and Image 2 in terms of achieving the goal?
"""
for env_name, prompt in goal_env_prompts.items():
gemini_free_query_env_prompts[env_name] = gemini_free_query_template.format(prompt)
### preference summary prompt
gemini_summary_env_prompts = {}
### template 1
gemini_summary_template = """
Based on the text below to the questions:
1. What is shown in Image 1?
2. What is shown in Image 2?
3. The goal is {}. Is there any difference between Image 1 and Image 2 in terms of achieving the goal?
{}
Is the goal better achieved in Image 1 or Image 2?
Reply a single line of 0 if the goal is better achieved in Image 1, or 1 if it is better achieved in Image 2.
Reply -1 if the text is unsure or there is no difference.
"""
for env_name, prompt in goal_env_prompts.items():
gemini_summary_env_prompts[env_name] = gemini_summary_template.format(prompt, "{}")
######################################################################
### asking gemini to output a preference with 1 stage analysis ###############
######################################################################
gemini_single_query_prompt_template = """
1. What is shown in Image 1?
2. What is shown in Image 2?
3. The goal is {}. Is there any difference between Image 1 and Image 2 in terms of achieving the goal?
Is the goal better achieved in Image 1 or Image 2?
Reply a single line of 0 if the goal is better achieved in Image 1, or 1 if it is better achieved in Image 2.
Reply -1 if the text is unsure or there is no difference.
"""
gemini_single_query_env_prompts = {}
for env_name, prompt in goal_env_prompts.items():
gemini_single_query_env_prompts[env_name] = gemini_single_query_prompt_template.format(prompt)
######################################################################
### asking gemini to output a preference with only labeling ###############
######################################################################
gemini_single_query_no_analysis_prompt_template = """
The goal is {}. Is the goal better achieved in Image 1 or Image 2?
At the end of the response, reply a single line of:
0 if the goal is better achieved in Image 1,
1 if it is better achieved in Image 2, or
-1 if there is no difference or if it is unclear.
"""
### asking gemini to output score
gemini_score_prompt_start = """
Consider the following image:
"""
gemini_score_template = """
1. What is shown in the image?
2. The goal is {}. On a scale of 0 to 1, the score is 1 if the goal is achieved. What score would you give the image in terms of achieving the goal?
"""
gemini_score_env_prompts = {}
for env_name, prompt in goal_env_prompts.items():
gemini_score_env_prompts[env_name] = gemini_score_template.format(prompt)
gemini_score_summary_template = """
Based on the text below to the questions:
1. What is shown in the image?
2. The goal is {}. On a scale of 0 to 1, the score is 1 if the goal is achieved. What score would you give the image in terms of achieving the goal?
{}
Please reply a single line of the score the text has given.
Reply -1 if the text is unsure.
"""
gemini_score_summary_env_prompts = {}
for env_name, prompt in goal_env_prompts.items():
gemini_score_summary_env_prompts[env_name] = gemini_score_summary_template.format(prompt, "{}")
### GPT uses the same prompt template as gemini
gpt_free_query_env_prompts = {}
gpt_free_query_template = copy.deepcopy(gemini_free_query_template)
for env_name, prompt in goal_env_prompts.items():
gpt_free_query_env_prompts[env_name] = gpt_free_query_template.format(prompt)
gpt_summary_env_prompts = {}
gpt_summary_template = copy.deepcopy(gemini_summary_template)
for env_name, prompt in goal_env_prompts.items():
gpt_summary_env_prompts[env_name] = gpt_summary_template.format(prompt, "{}")
gpt_score_query_env_prompts = {}
gpt_score_template = copy.deepcopy(gemini_score_template)
for env_name, prompt in goal_env_prompts.items():
gpt_score_query_env_prompts[env_name] = gpt_score_template.format(prompt)
gpt_score_summary_env_prompts = {}
gpt_score_summary_template = copy.deepcopy(gemini_score_summary_template)
for env_name, prompt in goal_env_prompts.items():
gpt_score_summary_env_prompts[env_name] = gpt_score_summary_template.format(prompt, "{}") |