Spaces:

Transcrib3D
/

Transcrib3D-Demo

Sleeping

App Files Files Community

Vincent-Tann commited on 26 days ago

Commit

b8de9e2

1 Parent(s): 1477c41

fix Gradio Chatbot bug; clear comments.

Browse files

Files changed (6) hide show

_appyibu.py +10 -8
app.py +12 -15
code_interpreter.py +8 -7
display_model.py +16 -16
object_filter_gpt4.py +2 -2
transcrib3d_main.py +26 -23

_appyibu.py CHANGED Viewed

@@ -13,23 +13,25 @@ new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb"
 objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
 def insert_user_none_between_assistant(messages):
-    # 初始化结果列表
     result = []
-    # 初始状态设置为"user"，以确保列表第一个条目为"assistant"时能正确插入
     last_role = "user"
     for msg in messages:
-        # 检查当前信息的角色
         current_role = msg["role"]
-        # 如果上一个和当前信息均为"assistant"，插入content为None的"user"信息
         if last_role == "assistant" and current_role == "assistant":
             result.append({"role": "user", "content": None})
-        # 将当前信息添加到结果列表
         result.append(msg)
-        # 更新上一条信息的角色
         last_role = current_role
     return result
@@ -122,11 +124,11 @@ with gr.Blocks() as demo:
             )
     # print("Type2:",type(model3d))
-    # 直接在 inputs列表里写model3d，会导致实际传给callback函数的是str
     # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
     bt.click(fn=process_instruction_callback, inputs=[user_instruction_textbox, gr.State(model3d), gr.State(dialogue)])#, outputs=[model3d,dialogue])
-    # 直接用lambda函数定义一个映射
     # type(user_instruction_textbox.value)
     # user_instruction_textbox.
     # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)

 objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
 def insert_user_none_between_assistant(messages):
+    # Initialize the result list
     result = []
+    # Set the initial state to "user" so insertion works correctly
+    # when the first item in the list is "assistant"
     last_role = "user"
     for msg in messages:
+        # Check the role of the current message
         current_role = msg["role"]
+        # If both the previous and current messages are "assistant",
+        # insert a "user" message whose content is None
         if last_role == "assistant" and current_role == "assistant":
             result.append({"role": "user", "content": None})
+        # Add the current message to the result list
         result.append(msg)
+        # Update the role of the previous message
         last_role = current_role
     return result
             )
     # print("Type2:",type(model3d))
+    # Passing model3d directly in the inputs list causes the callback to receive a string
     # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
     bt.click(fn=process_instruction_callback, inputs=[user_instruction_textbox, gr.State(model3d), gr.State(dialogue)])#, outputs=[model3d,dialogue])
+    # Define a mapping directly with a lambda function
     # type(user_instruction_textbox.value)
     # user_instruction_textbox.
     # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)

app.py CHANGED Viewed

@@ -49,23 +49,23 @@ def insert_user_blank_between_assistant(messages):
 def timer_check_update(code_interpreter, update_interval, stop_event):
     """
-    定时检查 code_interpreter.has_update 是否为True，
-    如果为True，则触发界面更新逻辑并重置状态。
-    参数：
-    - code_interpreter: CodeInterpreter的实例，预期包含has_update属性。
-    - update_interval: 定时器检查间隔，以秒为单位。
-    - stop_event: 一个threading.Event()实例，用于停止定时器线程。
     """
     while not stop_event.is_set():
         if code_interpreter.has_update:
-            # 实现更新界面显示的逻辑
             print("Detected update, trigger UI refreshing...")
-            # 在这里添加更新界面显示的代码
             # ...
-            # 重置has_update状态
             code_interpreter.has_update = False
-        # 等待下次检查
         time.sleep(update_interval)
 def process_instruction_callback(inp_api_key, instruction, llm_name):
@@ -194,13 +194,12 @@ with gr.Blocks() as demo:
             )
             # Right-5: Dialogue
             dialogue = gr.Chatbot(
-                type="messages",
                 height=470
             )
     # print("Type2:",type(model3d))
-    # 直接在 inputs列表里写model3d，会导致实际传给callback函数的是str
     # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
     bt.click(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox,llm_name_text], outputs=[model3d,dialogue])
     user_instruction_textbox.submit(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox, llm_name_text], outputs=[model3d,dialogue])
@@ -208,15 +207,13 @@ with gr.Blocks() as demo:
     scene_type_dropdown.select(fn=scene_type_dropdown_callback, inputs=scene_type_dropdown, outputs=model3d)
     llm_dropdown.select(fn=llm_dropdown_callback, inputs=llm_dropdown, outputs=llm_name_text)
-    # 直接用lambda函数定义一个映射
     # type(user_instruction_textbox.value)
     # user_instruction_textbox.
     # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
     # user_instruction_textbox.
     # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
-# os.system('uname -a')  # 显示所有系统信息
-# demo.launch()

 def timer_check_update(code_interpreter, update_interval, stop_event):
     """
+    Periodically check whether code_interpreter.has_update is True.
+    If it is True, trigger the UI refresh logic and reset the state.
+    Args:
+    - code_interpreter: A CodeInterpreter instance expected to have a has_update attribute.
+    - update_interval: Timer polling interval in seconds.
+    - stop_event: A threading.Event() instance used to stop the timer thread.
     """
     while not stop_event.is_set():
         if code_interpreter.has_update:
+            # Implement the UI refresh logic
             print("Detected update, trigger UI refreshing...")
+            # Add UI refresh code here
             # ...
+            # Reset the has_update flag
             code_interpreter.has_update = False
+        # Wait until the next check
         time.sleep(update_interval)
 def process_instruction_callback(inp_api_key, instruction, llm_name):
             )
             # Right-5: Dialogue
             dialogue = gr.Chatbot(
                 height=470
             )
     # print("Type2:",type(model3d))
+    # Passing model3d directly in the inputs list causes the callback to receive a string
     # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
     bt.click(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox,llm_name_text], outputs=[model3d,dialogue])
     user_instruction_textbox.submit(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox, llm_name_text], outputs=[model3d,dialogue])
     scene_type_dropdown.select(fn=scene_type_dropdown_callback, inputs=scene_type_dropdown, outputs=model3d)
     llm_dropdown.select(fn=llm_dropdown_callback, inputs=llm_dropdown, outputs=llm_name_text)
+    # Define a mapping directly with a lambda function
     # type(user_instruction_textbox.value)
     # user_instruction_textbox.
     # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
     # user_instruction_textbox.
     # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)

code_interpreter.py CHANGED Viewed

@@ -12,9 +12,10 @@ class CodeInterpreter(Dialogue):
         super().__init__(**kwargs)
     def call_openai_with_code_interpreter(self, user_prompt,namespace_for_exec={},token_usage_total=0):
-        # 如果gpt回复的内容包含python代码，则把代码的执行结果发送给gpt，继续等待其回复
-        # 如果gpt回复的内容不包含python代码，则此函数返回全部结果
-        # 每次递归统计使用的token数，最终返回总的token数
         assistant_response,token_usage = self.call_openai(user_prompt)
         token_usage_total+=token_usage
@@ -52,15 +53,15 @@ class CodeInterpreter(Dialogue):
             # f.close()
             # sys.stdout = sys.__stdout__
-            #############利用保存文件的方式####################
-            # # 将代码片段保存到 code_snippet.py 文件
             # with open("code_snippet.py", "w") as file:
             #     file.write(code_snippet)
-            # # 执行 code_snippet.py 并将输出重定向到临时文件
             # os.system("python code_snippet.py > output.txt")
-            # # 从临时文件中读取结果
             # with open("output.txt", "r") as file:
             #     code_exe_result = file.read()
             ##################################################

         super().__init__(**kwargs)
     def call_openai_with_code_interpreter(self, user_prompt,namespace_for_exec={},token_usage_total=0):
+        # If the GPT response contains Python code, execute it and send the result
+        # back to GPT, then continue waiting for its reply.
+        # If the GPT response does not contain Python code, return the full result.
+        # Accumulate token usage on each recursive call and return the total at the end.
         assistant_response,token_usage = self.call_openai(user_prompt)
         token_usage_total+=token_usage
             # f.close()
             # sys.stdout = sys.__stdout__
+            #############Using the file-saving approach####################
+            # # Save the code snippet to the code_snippet.py file
             # with open("code_snippet.py", "w") as file:
             #     file.write(code_snippet)
+            # # Execute code_snippet.py and redirect the output to a temporary file
             # os.system("python code_snippet.py > output.txt")
+            # # Read the result from the temporary file
             # with open("output.txt", "r") as file:
             #     code_exe_result = file.read()
             ##################################################

display_model.py CHANGED Viewed

@@ -62,13 +62,13 @@ def add_1box_to_ply(box, ply_file, new_ply_file, line_width=0.05, obj_id=1):
     box_vertices['blue'] = [color[2]] * 16
     box_vertices['alpha'] = [obj_id] * 16
-    # 将新的顶点数据添加到原始顶点数据后面
     updated_vertices = np.concatenate((vertices, box_vertices))
-    # 创建包含新顶点的PlyElement对象
     updated_vertex_element = PlyElement.describe(updated_vertices, 'vertex')
-    # 将更新后的PlyElement对象替换原始的顶点数据
     # ply_data['vertex'] = updated_vertex_element
     # get the number of original vertices:
@@ -108,18 +108,18 @@ def add_1box_to_ply(box, ply_file, new_ply_file, line_width=0.05, obj_id=1):
     box_faces = np.zeros(len(box_connections), dtype=faces.dtype)
     box_faces['vertex_indices'] = box_connections
-    # 将新的face数据添加到原始顶点数据后面
     updated_faces = np.concatenate((faces, box_faces))
-    # 创建包含新顶点的PlyElement对象
     updated_face_element = PlyElement.describe(updated_faces, 'face')
-    # 将更新后的PlyElement对象替换原始的顶点数据
     # ply_data['face'] = updated_face_element
     new_ply_data = PlyData([updated_vertex_element, updated_face_element])
-    # 将更新后的PlyData对象写回Ply文件
     with open(new_ply_file, 'wb') as f:
         new_ply_data.write(f)
@@ -127,36 +127,36 @@ def add_1box_to_ply(box, ply_file, new_ply_file, line_width=0.05, obj_id=1):
 def ply_to_obj(ply_file, obj_file, mtl_file):
-    # 读取PLY文件
     with open(ply_file, 'rb') as f:
         plydata = PlyData.read(f)
-        # 获取顶点和面数据
         vertices = np.vstack([plydata['vertex'][prop] for prop in ['x', 'y', 'z']]).T
         colors = np.vstack([plydata['vertex'][prop] for prop in ['red', 'green', 'blue', 'alpha']]).T/255.0
         faces = plydata['face']['vertex_indices']
-    # 写入OBJ文件
     with open(obj_file, 'w') as f:
-        # 写入依赖的mtl文件（颜色）
         f.write("mtllib %s\n"%mtl_file.split('/')[-1])
-        # 写入顶点信息
         for vertex in vertices:
             f.write(f"v {' '.join(map(str, vertex))}\n")
-        # 写入颜色信息
         for idx in range(len(vertices)):
             f.write("usemtl mat%d\n"%(idx+1))
-        # 写入面信息
         for face in faces:
             f.write("f")
             for vertex_index in face:
-                f.write(f" {vertex_index + 1}")  # OBJ文件索引从1开始
             f.write("\n")
-    # 写入mtl文件
     with open(mtl_file, 'w') as f:
         for idx, color in enumerate(colors):
             f.write("newmtl mat%d\n" % (idx+1))

     box_vertices['blue'] = [color[2]] * 16
     box_vertices['alpha'] = [obj_id] * 16
+    # Append the new vertex data after the original vertex data
     updated_vertices = np.concatenate((vertices, box_vertices))
+    # Create a PlyElement object containing the new vertices
     updated_vertex_element = PlyElement.describe(updated_vertices, 'vertex')
+    # Replace the original vertex data with the updated PlyElement object
     # ply_data['vertex'] = updated_vertex_element
     # get the number of original vertices:
     box_faces = np.zeros(len(box_connections), dtype=faces.dtype)
     box_faces['vertex_indices'] = box_connections
+    # Append the new face data after the original face data
     updated_faces = np.concatenate((faces, box_faces))
+    # Create a PlyElement object containing the new faces
     updated_face_element = PlyElement.describe(updated_faces, 'face')
+    # Replace the original face data with the updated PlyElement object
     # ply_data['face'] = updated_face_element
     new_ply_data = PlyData([updated_vertex_element, updated_face_element])
+    # Write the updated PlyData object back to the PLY file
     with open(new_ply_file, 'wb') as f:
         new_ply_data.write(f)
 def ply_to_obj(ply_file, obj_file, mtl_file):
+    # Read the PLY file
     with open(ply_file, 'rb') as f:
         plydata = PlyData.read(f)
+        # Get the vertex and face data
         vertices = np.vstack([plydata['vertex'][prop] for prop in ['x', 'y', 'z']]).T
         colors = np.vstack([plydata['vertex'][prop] for prop in ['red', 'green', 'blue', 'alpha']]).T/255.0
         faces = plydata['face']['vertex_indices']
+    # Write the OBJ file
     with open(obj_file, 'w') as f:
+        # Write the referenced MTL file (material colors)
         f.write("mtllib %s\n"%mtl_file.split('/')[-1])
+        # Write the vertex data
         for vertex in vertices:
             f.write(f"v {' '.join(map(str, vertex))}\n")
+        # Write the material assignments
         for idx in range(len(vertices)):
             f.write("usemtl mat%d\n"%(idx+1))
+        # Write the face data
         for face in faces:
             f.write("f")
             for vertex_index in face:
+                f.write(f" {vertex_index + 1}")  # OBJ indices start from 1
             f.write("\n")
+    # Write the MTL file
     with open(mtl_file, 'w') as f:
         for idx, color in enumerate(colors):
             f.write("newmtl mat%d\n" % (idx+1))

object_filter_gpt4.py CHANGED Viewed

@@ -40,7 +40,7 @@ class ObjectFilter(Dialogue):
         super().__init__(**config)
     def extract_all_int_lists_from_text(self,text) ->list:
-        # 匹配方括号内的内容
         pattern = r'\[([^\[\]]+)\]'
         matches = re.findall(pattern, text)
@@ -134,7 +134,7 @@ if __name__ == "__main__":
         scanrefer_data=json.load(json_file)
     from datetime import datetime
-    # 记录时间作为文件名
     current_time = datetime.now()
     formatted_time = current_time.strftime("%Y-%m-%d-%H-%M-%S")
     print("formatted_time:",formatted_time)

         super().__init__(**config)
     def extract_all_int_lists_from_text(self,text) ->list:
+        # Match the content inside square brackets
         pattern = r'\[([^\[\]]+)\]'
         matches = re.findall(pattern, text)
         scanrefer_data=json.load(json_file)
     from datetime import datetime
+    # Record the current time to use as the folder name
     current_time = datetime.now()
     formatted_time = current_time.strftime("%Y-%m-%d-%H-%M-%S")
     print("formatted_time:",formatted_time)

transcrib3d_main.py CHANGED Viewed

@@ -98,10 +98,10 @@ def gen_prompt(user_instruction, scan_id):
     objects_related = objects_info
-    # 获取场景的中心坐标
     # scene_center=get_scene_center(objects_related)
-    scene_center = get_scene_center(objects_info)  # 注意这里应该用所有物体的信息，而不只是relevant
-    # 生成prompt中的背景信息部分
     prompt = scan_id + ":objects with quantitative description based on right-hand Cartesian coordinate system with x-y-z axes, x-y plane=ground, z-axis=up/down. Coords format [x, y, z].\n\n"
     # if dataset == 'nr3d':
     #     prompt = prompt + "Scene center:%s. If no direction vector, observer at center for objorientation.\n" % remove_spaces(str(scene_center))
@@ -114,57 +114,58 @@ def gen_prompt(user_instruction, scan_id):
     prompt = prompt + "Scene center:%s. If no direction vector, observer at center for obj orientation.\n\n" % remove_spaces(str(scene_center))
     prompt = prompt + "objs list:\n"
     lines = []
-    # 生成prompt中对物体的定量描述部分（遍历所有相关物体）
     for obj in objects_related:
-        # 位置信息，保留2位小数
         center_position = obj['center_position']
         center_position = round_list(center_position, 2)
-        # size信息，保留2位小数
         size = obj['size']
         size = round_list(size, 2)
-        # extension信息，保留2位小数
         extension = obj['extension']
         extension = round_list(extension, 2)
-        # 方向信息，用方向向量表示. 注意，scanrefer由于用的不是scannet原始obj id，所以不能用方向信息
         if obj['has_front']:
             front_point = np.array(obj['front_point'])
             center = np.array(obj['obb'][0:3])
             direction_vector = front_point - center
             direction_vector_normalized = direction_vector / np.linalg.norm(direction_vector)
-            # 再计算左和右的方向向量，全部保留两位小数
             front_vector = round_list(direction_vector_normalized, 2)
             up_vector = np.array([0, 0, 1])
             left_vector = round_list(np.cross(direction_vector_normalized, up_vector), 2)
             right_vector = round_list(np.cross(up_vector, direction_vector_normalized), 2)
             behind_vector = round_list(-np.array(front_vector), 2)
-            # 生成方向信息
             direction_info = ";direction vectors:front=%s,left=%s,right=%s,behind=%s\n" %(front_vector, left_vector, right_vector, behind_vector)
             #
         else:
-            direction_info = "\n"  # 未知方向向量就啥都不写
-        # sr3d，给出center、size
         # if dataset == 'sr3d':
         if False:
             line = f'{obj["label"]},id={obj["id"]},ctr={remove_spaces(str(center_position))},size={remove_spaces(str(size))}'
-        # nr3d和scanrefer，给出center、size、color
         else:
             rgb = obj['avg_rgba'][0:3]
             hsl = round_list(rgb_to_hsl(rgb), 2)
-            # line="%s,id=%s,ctr=%s,size=%s,RGB=%s" %(obj['label'], obj['id'], self.remove_space(str(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(rgb) )) 原版rgb
-            line="%s,id=%s,ctr=%s,size=%s,HSL=%s" %(obj['label'], obj['id'], remove_spaces(str(center_position)), remove_spaces(str(size)), remove_spaces(str(hsl)))#rgb换成hsl
-            # line = "%s(relevant to %s),id=%s,ctr=%s,size=%s,HSL=%s" % (obj['label'],id_to_name_in_description[obj['id']], obj['id'], self.remove_spaces(st(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(hsl)))  # 格式：name=原名称(description里的名称)
             # if id_to_name_in_description[obj['id']]=='room':
             #     name=obj['label']
             # else:
             #     name=id_to_name_in_description[obj['id']]
-            # line="%s,id=%s,ctr=%s,size=%s,HSL=%s" %(name, obj['id'], self.remove_spaces(st(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(hsl) )) # 式：name=description里的名称
         lines.append(line + direction_info)
     # if self.obj_info_ablation_type == 4:
     #     random.seed(0)
     #     random.shuffle(lines)
     prompt += ''.join(lines)
-    # prompt中的要求
     line = "\nInstruction:find the one described object in description: \n\"%s\"\n" % user_instruction
     prompt = prompt + line
@@ -208,21 +209,23 @@ def get_gpt_response(prompt: str, code_interpreter: CodeInterpreter):
     return response
 def extract_answer_id_from_last_line(last_line, random_choice_list=[0,]):
-    # 如果没有按照预期格式回复则随机选取(Sr3d)或直接选成0(Nr3d和Scanrefer);按预期格式恢复则提取答案
     wrong_return_format = False
     last_line_split = last_line.split('--')
-    # 使用正则表达式从字符串中提取字典部分
     pattern = r"\{[^\}]*\}"
     match = re.search(pattern, last_line_split[-1])
     if match:
-        # 获取匹配的字典字符串
         matched_dict_str = match.group()
         try:
-            # 解析字典字符串为字典对象
             extracted_dict = ast.literal_eval(matched_dict_str)
             print(extracted_dict)
             answer_id = extracted_dict['ID']
-            # 如果确实以 Now the answer is complete -- {'ID': xxx} 的格式回复了，但是xxx不是数字（例如是None），也能随机选。
             if not isinstance(answer_id, int):
                 if isinstance(answer_id, list) and all([isinstance(e, int) for e in answer_id]):
                     print("Wrong answer format: %s. random choice from this list" % str(answer_id))

     objects_related = objects_info
+    # Get the center coordinates of the scene
     # scene_center=get_scene_center(objects_related)
+    scene_center = get_scene_center(objects_info)  # Note: all object information should be used here, not just the relevant ones
+    # Generate the background information section of the prompt
     prompt = scan_id + ":objects with quantitative description based on right-hand Cartesian coordinate system with x-y-z axes, x-y plane=ground, z-axis=up/down. Coords format [x, y, z].\n\n"
     # if dataset == 'nr3d':
     #     prompt = prompt + "Scene center:%s. If no direction vector, observer at center for objorientation.\n" % remove_spaces(str(scene_center))
     prompt = prompt + "Scene center:%s. If no direction vector, observer at center for obj orientation.\n\n" % remove_spaces(str(scene_center))
     prompt = prompt + "objs list:\n"
     lines = []
+    # Generate the quantitative object descriptions in the prompt (iterate over all relevant objects)
     for obj in objects_related:
+        # Position information, rounded to 2 decimal places
         center_position = obj['center_position']
         center_position = round_list(center_position, 2)
+        # Size information, rounded to 2 decimal places
         size = obj['size']
         size = round_list(size, 2)
+        # Extension information, rounded to 2 decimal places
         extension = obj['extension']
         extension = round_list(extension, 2)
+        # Direction information represented by direction vectors.
+        # Note: ScanRefer does not use the original ScanNet object IDs, so direction information cannot be used.
         if obj['has_front']:
             front_point = np.array(obj['front_point'])
             center = np.array(obj['obb'][0:3])
             direction_vector = front_point - center
             direction_vector_normalized = direction_vector / np.linalg.norm(direction_vector)
+            # Compute the left and right direction vectors as well, all rounded to 2 decimal places
             front_vector = round_list(direction_vector_normalized, 2)
             up_vector = np.array([0, 0, 1])
             left_vector = round_list(np.cross(direction_vector_normalized, up_vector), 2)
             right_vector = round_list(np.cross(up_vector, direction_vector_normalized), 2)
             behind_vector = round_list(-np.array(front_vector), 2)
+            # Generate the direction information
             direction_info = ";direction vectors:front=%s,left=%s,right=%s,behind=%s\n" %(front_vector, left_vector, right_vector, behind_vector)
             #
         else:
+            direction_info = "\n"  # If the direction vector is unknown, leave this blank
+        # For sr3d, provide center and size
         # if dataset == 'sr3d':
         if False:
             line = f'{obj["label"]},id={obj["id"]},ctr={remove_spaces(str(center_position))},size={remove_spaces(str(size))}'
+        # For nr3d and ScanRefer, provide center, size, and color
         else:
             rgb = obj['avg_rgba'][0:3]
             hsl = round_list(rgb_to_hsl(rgb), 2)
+            # line="%s,id=%s,ctr=%s,size=%s,RGB=%s" %(obj['label'], obj['id'], self.remove_space(str(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(rgb) )) original RGB version
+            line="%s,id=%s,ctr=%s,size=%s,HSL=%s" %(obj['label'], obj['id'], remove_spaces(str(center_position)), remove_spaces(str(size)), remove_spaces(str(hsl)))#switched from RGB to HSL
+            # line = "%s(relevant to %s),id=%s,ctr=%s,size=%s,HSL=%s" % (obj['label'],id_to_name_in_description[obj['id']], obj['id'], self.remove_spaces(st(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(hsl)))  # Format: name=original name (the name used in the description)
             # if id_to_name_in_description[obj['id']]=='room':
             #     name=obj['label']
             # else:
             #     name=id_to_name_in_description[obj['id']]
+            # line="%s,id=%s,ctr=%s,size=%s,HSL=%s" %(name, obj['id'], self.remove_spaces(st(center_position)), self.remove_spaces(str(size)), self.remove_spaces(str(hsl) )) # Format: name=the name used in the description
         lines.append(line + direction_info)
     # if self.obj_info_ablation_type == 4:
     #     random.seed(0)
     #     random.shuffle(lines)
     prompt += ''.join(lines)
+    # Requirements in the prompt
     line = "\nInstruction:find the one described object in description: \n\"%s\"\n" % user_instruction
     prompt = prompt + line
     return response
 def extract_answer_id_from_last_line(last_line, random_choice_list=[0,]):
+    # If the reply does not follow the expected format, choose randomly (Sr3d) or default to 0 (Nr3d and ScanRefer);
+    # otherwise, extract the answer from the expected format.
     wrong_return_format = False
     last_line_split = last_line.split('--')
+    # Use a regular expression to extract the dictionary portion from the string
     pattern = r"\{[^\}]*\}"
     match = re.search(pattern, last_line_split[-1])
     if match:
+        # Get the matched dictionary string
         matched_dict_str = match.group()
         try:
+            # Parse the dictionary string into a dictionary object
             extracted_dict = ast.literal_eval(matched_dict_str)
             print(extracted_dict)
             answer_id = extracted_dict['ID']
+            # If the response does follow the expected format but xxx is not a number
+            # (for example, None), still fall back to a random choice.
             if not isinstance(answer_id, int):
                 if isinstance(answer_id, list) and all([isinstance(e, int) for e in answer_id]):
                     print("Wrong answer format: %s. random choice from this list" % str(answer_id))