HongzeFu commited on
Commit
41a86ed
·
1 Parent(s): 3b27796

unified system log

Browse files
gradio-web/config.py CHANGED
@@ -66,12 +66,13 @@ UI_TEXT = {
66
  "action_selection_prompt": "Please select the action.\nActions with 🎯 need to select a point on the image as input",
67
  "point_selection_prompt": "Current action needs location input, please click on the image to select key pixel",
68
  "point_selected_message": "Select: {label} | point <{x}, {y}>",
69
- "execute_action_prompt": "Executing Action {label}",
 
70
  "demo_video_prompt": 'Press "Watch Video Input 🎬" to watch a video\nNote: you can only watch the video once',
71
  "session_error": "Session expired. Please refresh the page and try again.",
72
  "reference_action_error": "Ground Truth Action Error: {error}",
73
- "reference_action_message": "Ground Truth Action: {option_label}. {option_action}",
74
- "reference_action_message_with_coords": "Ground Truth Action: {option_label}. {option_action} | coords: {coords_text}",
75
  "reference_action_status": "Ground Truth Action: {message}",
76
  "execute_missing_action": "Error: No action selected",
77
  "episode_success_banner": "********************************\n**** episode success ****\n********************************\n ---please press change episode---- ",
 
66
  "action_selection_prompt": "Please select the action.\nActions with 🎯 need to select a point on the image as input",
67
  "point_selection_prompt": "Current action needs location input, please click on the image to select key pixel",
68
  "point_selected_message": "Select: {label} | point <{x}, {y}>",
69
+ "execute_action_prompt": "Executing: {label}",
70
+ "execute_action_prompt_with_coords": "Executing: {label} | point <{coords_text}>",
71
  "demo_video_prompt": 'Press "Watch Video Input 🎬" to watch a video\nNote: you can only watch the video once',
72
  "session_error": "Session expired. Please refresh the page and try again.",
73
  "reference_action_error": "Ground Truth Action Error: {error}",
74
+ "reference_action_message": "Ground Truth Action: {option_label}",
75
+ "reference_action_message_with_coords": "Ground Truth Action: {option_label} | point <{coords_text}>",
76
  "reference_action_status": "Ground Truth Action: {message}",
77
  "execute_missing_action": "Error: No action selected",
78
  "episode_success_banner": "********************************\n**** episode success ****\n********************************\n ---please press change episode---- ",
gradio-web/gradio_callbacks.py CHANGED
@@ -82,10 +82,71 @@ def _get_raw_option_label(session, option_idx):
82
  return label or None
83
 
84
 
85
- def _execution_video_log(session, option_idx, fallback_status=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  label = _get_raw_option_label(session, option_idx)
87
- if label:
88
- return format_log_markdown(_ui_text("log", "execute_action_prompt", label=label))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  if fallback_status is None:
90
  return None
91
  return format_log_markdown(fallback_status)
@@ -101,11 +162,11 @@ def _default_post_execute_log_state():
101
 
102
 
103
  def _point_selected_log(session, option_value, x, y):
104
- label = _get_raw_option_label(session, option_value)
105
- if label:
106
- display_label = _format_choice_prefix(label)
107
- else:
108
- display_label = str(option_value).strip() if option_value is not None else "?"
109
  return format_log_markdown(
110
  _ui_text("log", "point_selected_message", label=display_label, x=int(x), y=int(y))
111
  )
@@ -931,24 +992,7 @@ def on_map_click(uid, option_value, evt: gr.SelectData):
931
 
932
 
933
  def _is_valid_coords_text(coords_text: str) -> bool:
934
- if not isinstance(coords_text, str):
935
- return False
936
- text = coords_text.strip()
937
- if text in {
938
- "",
939
- _ui_text("coords", "select_point"),
940
- _ui_text("coords", "not_needed"),
941
- }:
942
- return False
943
- if "," not in text:
944
- return False
945
- try:
946
- x_raw, y_raw = text.split(",", 1)
947
- int(x_raw.strip())
948
- int(y_raw.strip())
949
- except Exception:
950
- return False
951
- return True
952
 
953
 
954
  def on_option_select(
@@ -1096,33 +1140,29 @@ def on_reference_action(uid, current_option_value=None):
1096
 
1097
  option_idx = reference.get("option_idx")
1098
  current_option_idx = _parse_option_idx(current_option_value)
1099
- option_label = str(reference.get("option_label", "")).strip()
1100
- option_action = str(reference.get("option_action", "")).strip()
1101
- option_action = get_ui_action_text(getattr(session, "env_id", None), option_action)
 
 
1102
  need_coords = bool(reference.get("need_coords", False))
1103
  coords_xy = reference.get("coords_xy")
1104
  suppress_next_option_change = option_idx != current_option_idx
1105
 
1106
  updated_img = current_img
1107
  coords_text = _ui_text("coords", "not_needed")
1108
- log_text = _ui_text(
1109
- "log",
1110
- "reference_action_message",
1111
- option_label=option_label,
1112
- option_action=option_action,
1113
- ).strip()
1114
 
1115
  if need_coords and isinstance(coords_xy, (list, tuple)) and len(coords_xy) >= 2:
1116
  x = int(coords_xy[0])
1117
  y = int(coords_xy[1])
1118
  updated_img = draw_marker(current_img, x, y)
1119
- coords_text = f"{x}, {y}"
1120
- log_text = _ui_text(
1121
- "log",
1122
- "reference_action_message_with_coords",
1123
- option_label=option_label,
1124
- option_action=option_action,
1125
- coords_text=coords_text,
1126
  )
1127
  LOGGER.debug(
1128
  "on_reference_action resolved uid=%s option_idx=%s need_coords=%s coords=%s",
@@ -1136,7 +1176,7 @@ def on_reference_action(uid, current_option_value=None):
1136
  _live_obs_update(value=updated_img, interactive=False),
1137
  gr.update(value=option_idx),
1138
  coords_text,
1139
- format_log_markdown(log_text),
1140
  suppress_next_option_change,
1141
  )
1142
 
@@ -1363,13 +1403,7 @@ def execute_step(uid, option_idx, coords_str):
1363
  )
1364
 
1365
  # Parse coords
1366
- click_coords = None
1367
- if coords_str and "," in coords_str:
1368
- try:
1369
- parts = coords_str.split(",")
1370
- click_coords = (int(parts[0].strip()), int(parts[1].strip()))
1371
- except:
1372
- pass
1373
 
1374
  # Execute
1375
  # 如果达到 execute 次数限制,模拟失败状态(使用和任务失败一样的机制)
@@ -1517,7 +1551,10 @@ def execute_step(uid, option_idx, coords_str):
1517
  # 格式化日志消息为 HTML 格式(支持颜色显示)
1518
  formatted_status = format_log_markdown(status)
1519
  if show_execution_video and not done:
1520
- formatted_status = _execution_video_log(session, option_idx, fallback_status=status) or formatted_status
 
 
 
1521
  post_execute_log_state = {
1522
  "preserve_terminal_log": False,
1523
  "terminal_log_value": None,
 
82
  return label or None
83
 
84
 
85
+ def _choice_label_token(text):
86
+ if not isinstance(text, str):
87
+ return None
88
+
89
+ stripped = text.strip()
90
+ if not stripped:
91
+ return None
92
+
93
+ prefix, dot, _rest = stripped.partition(".")
94
+ if dot and prefix.isalpha() and len(prefix) <= 4:
95
+ return prefix.upper()
96
+ if stripped.isalpha() and len(stripped) <= 4:
97
+ return stripped.upper()
98
+ return stripped
99
+
100
+
101
+ def _display_choice_label(session, option_idx=None, fallback_label=None):
102
  label = _get_raw_option_label(session, option_idx)
103
+ if label is None:
104
+ label = fallback_label
105
+ return _choice_label_token(label)
106
+
107
+
108
+ def _parse_coords_text(coords_text):
109
+ if not isinstance(coords_text, str):
110
+ return None
111
+
112
+ text = coords_text.strip()
113
+ if text in {
114
+ "",
115
+ _ui_text("coords", "select_point"),
116
+ _ui_text("coords", "not_needed"),
117
+ }:
118
+ return None
119
+ if "," not in text:
120
+ return None
121
+
122
+ try:
123
+ x_raw, y_raw = text.split(",", 1)
124
+ return int(x_raw.strip()), int(y_raw.strip())
125
+ except Exception:
126
+ return None
127
+
128
+
129
+ def _coords_text(x, y):
130
+ return f"{int(x)}, {int(y)}"
131
+
132
+
133
+ def _format_action_log(message_key, label_key, label, coords=None):
134
+ display_label = _choice_label_token(label)
135
+ if not display_label:
136
+ return None
137
+
138
+ kwargs = {label_key: display_label}
139
+ if isinstance(coords, (list, tuple)) and len(coords) >= 2:
140
+ kwargs["coords_text"] = _coords_text(coords[0], coords[1])
141
+ return format_log_markdown(_ui_text("log", f"{message_key}_with_coords", **kwargs))
142
+ return format_log_markdown(_ui_text("log", message_key, **kwargs))
143
+
144
+
145
+ def _execution_video_log(session, option_idx, coords=None, fallback_status=None):
146
+ label = _display_choice_label(session, option_idx)
147
+ log_text = _format_action_log("execute_action_prompt", "label", label, coords=coords)
148
+ if log_text is not None:
149
+ return log_text
150
  if fallback_status is None:
151
  return None
152
  return format_log_markdown(fallback_status)
 
162
 
163
 
164
  def _point_selected_log(session, option_value, x, y):
165
+ display_label = _display_choice_label(
166
+ session,
167
+ option_value,
168
+ fallback_label=str(option_value).strip() if option_value is not None else "?",
169
+ )
170
  return format_log_markdown(
171
  _ui_text("log", "point_selected_message", label=display_label, x=int(x), y=int(y))
172
  )
 
992
 
993
 
994
  def _is_valid_coords_text(coords_text: str) -> bool:
995
+ return _parse_coords_text(coords_text) is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
996
 
997
 
998
  def on_option_select(
 
1140
 
1141
  option_idx = reference.get("option_idx")
1142
  current_option_idx = _parse_option_idx(current_option_value)
1143
+ option_label = _display_choice_label(
1144
+ session,
1145
+ option_idx,
1146
+ fallback_label=str(reference.get("option_label", "")).strip(),
1147
+ )
1148
  need_coords = bool(reference.get("need_coords", False))
1149
  coords_xy = reference.get("coords_xy")
1150
  suppress_next_option_change = option_idx != current_option_idx
1151
 
1152
  updated_img = current_img
1153
  coords_text = _ui_text("coords", "not_needed")
1154
+ log_text = _format_action_log("reference_action_message", "option_label", option_label)
 
 
 
 
 
1155
 
1156
  if need_coords and isinstance(coords_xy, (list, tuple)) and len(coords_xy) >= 2:
1157
  x = int(coords_xy[0])
1158
  y = int(coords_xy[1])
1159
  updated_img = draw_marker(current_img, x, y)
1160
+ coords_text = _coords_text(x, y)
1161
+ log_text = _format_action_log(
1162
+ "reference_action_message",
1163
+ "option_label",
1164
+ option_label,
1165
+ coords=(x, y),
 
1166
  )
1167
  LOGGER.debug(
1168
  "on_reference_action resolved uid=%s option_idx=%s need_coords=%s coords=%s",
 
1176
  _live_obs_update(value=updated_img, interactive=False),
1177
  gr.update(value=option_idx),
1178
  coords_text,
1179
+ log_text,
1180
  suppress_next_option_change,
1181
  )
1182
 
 
1403
  )
1404
 
1405
  # Parse coords
1406
+ click_coords = _parse_coords_text(coords_str)
 
 
 
 
 
 
1407
 
1408
  # Execute
1409
  # 如果达到 execute 次数限制,模拟失败状态(使用和任务失败一样的机制)
 
1551
  # 格式化日志消息为 HTML 格式(支持颜色显示)
1552
  formatted_status = format_log_markdown(status)
1553
  if show_execution_video and not done:
1554
+ formatted_status = (
1555
+ _execution_video_log(session, option_idx, coords=click_coords, fallback_status=status)
1556
+ or formatted_status
1557
+ )
1558
  post_execute_log_state = {
1559
  "preserve_terminal_log": False,
1560
  "terminal_log_value": None,
gradio-web/test/test_live_obs_refresh.py CHANGED
@@ -63,7 +63,7 @@ def test_execute_step_builds_video_from_last_execution_frames(monkeypatch, reloa
63
  assert result[11]["value"] is None
64
  assert result[11]["interactive"] is False
65
  assert result[14]["interactive"] is False
66
- expected_log = callbacks.UI_TEXT["log"]["execute_action_prompt"].format(label="a")
67
  assert result[1] == expected_log
68
  assert result[15] == {
69
  "exec_btn_interactive": True,
@@ -78,6 +78,37 @@ def test_execute_step_builds_video_from_last_execution_frames(monkeypatch, reloa
78
  assert result[17] == "execution_video"
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def test_execute_step_falls_back_to_single_frame_clip_when_no_new_frames(monkeypatch, reload_module):
82
  callbacks = reload_module("gradio_callbacks")
83
 
 
63
  assert result[11]["value"] is None
64
  assert result[11]["interactive"] is False
65
  assert result[14]["interactive"] is False
66
+ expected_log = callbacks.UI_TEXT["log"]["execute_action_prompt"].format(label="A")
67
  assert result[1] == expected_log
68
  assert result[15] == {
69
  "exec_btn_interactive": True,
 
78
  assert result[17] == "execution_video"
79
 
80
 
81
+ def test_execute_step_execution_log_includes_point_when_coords_selected(monkeypatch, reload_module):
82
+ callbacks = reload_module("gradio_callbacks")
83
+
84
+ frame = np.full((8, 8, 3), 44, dtype=np.uint8)
85
+ session = _FakeSession()
86
+ session.raw_solve_options = [{"label": "b", "available": [object()]}]
87
+ session.base_frames = [frame]
88
+
89
+ captured = {}
90
+
91
+ def _execute_action(_option_idx, coords):
92
+ captured["coords"] = coords
93
+ session.last_execution_frames = [frame]
94
+ return "IMG", "Executing: pick", False
95
+
96
+ session.execute_action = _execute_action
97
+
98
+ monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
99
+ monkeypatch.setattr(callbacks, "increment_execute_count", lambda uid, env_id, episode_idx: 1)
100
+ monkeypatch.setattr(callbacks, "concatenate_frames_horizontally", lambda frames, env_id=None: list(frames))
101
+ monkeypatch.setattr(callbacks, "save_video", lambda frames, suffix="": "/tmp/exec-point.mp4")
102
+ monkeypatch.setattr(callbacks.os.path, "exists", lambda path: True)
103
+ monkeypatch.setattr(callbacks.os.path, "getsize", lambda path: 10)
104
+
105
+ result = callbacks.execute_step("uid-1", 0, "12, 34")
106
+
107
+ assert captured["coords"] == (12, 34)
108
+ assert result[1] == "Executing: B | point <12, 34>"
109
+ assert result[16]["execute_video_log_value"] == "Executing: B | point <12, 34>"
110
+
111
+
112
  def test_execute_step_falls_back_to_single_frame_clip_when_no_new_frames(monkeypatch, reload_module):
113
  callbacks = reload_module("gradio_callbacks")
114
 
gradio-web/test/test_reference_action_callbacks.py CHANGED
@@ -57,8 +57,7 @@ def test_on_reference_action_success_updates_option_and_coords(monkeypatch, relo
57
  assert coords_text == "5, 6"
58
  assert suppress_flag is True
59
  expected_log = config.UI_TEXT["log"]["reference_action_message_with_coords"].format(
60
- option_label="c",
61
- option_action="press the button",
62
  coords_text="5, 6",
63
  )
64
  assert log_html == expected_log
@@ -189,7 +188,6 @@ def test_on_reference_action_uses_configured_action_text_override(monkeypatch, r
189
 
190
  assert coords_text == config.UI_TEXT["coords"]["not_needed"]
191
  assert log_html == config.UI_TEXT["log"]["reference_action_message"].format(
192
- option_label="a",
193
- option_action="move forward↓",
194
  )
195
  assert suppress_flag is True
 
57
  assert coords_text == "5, 6"
58
  assert suppress_flag is True
59
  expected_log = config.UI_TEXT["log"]["reference_action_message_with_coords"].format(
60
+ option_label="C",
 
61
  coords_text="5, 6",
62
  )
63
  assert log_html == expected_log
 
188
 
189
  assert coords_text == config.UI_TEXT["coords"]["not_needed"]
190
  assert log_html == config.UI_TEXT["log"]["reference_action_message"].format(
191
+ option_label="A",
 
192
  )
193
  assert suppress_flag is True
gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED
@@ -1855,8 +1855,7 @@ def test_reference_action_single_click_applies_coords_without_wait_state(monkeyp
1855
  page.wait_for_selector("#reference_action_btn button, button#reference_action_btn", timeout=15000)
1856
 
1857
  expected_reference_log = config_module.UI_TEXT["log"]["reference_action_message_with_coords"].format(
1858
- option_label="a",
1859
- option_action="pick the left cube",
1860
  coords_text="5, 6",
1861
  )
1862
  page.locator("#reference_action_btn button, button#reference_action_btn").first.click()
@@ -2817,6 +2816,8 @@ def _run_local_execute_video_transition_test(
2817
  arg={"label": "B"},
2818
  timeout=5000,
2819
  )
 
 
2820
  page.locator("#exec_btn button, button#exec_btn").first.click()
2821
  page.wait_for_selector("#execute_video video", timeout=5000)
2822
  page.wait_for_function(
@@ -2842,7 +2843,10 @@ def _run_local_execute_video_transition_test(
2842
  )
2843
  if not done:
2844
  execution_log = _read_log_output_value(page)
2845
- assert execution_log == config_module.UI_TEXT["log"]["execute_action_prompt"].format(label="b")
 
 
 
2846
  assert execution_log != config_module.UI_TEXT["log"]["point_selection_prompt"]
2847
  controls_after_execute = _read_demo_video_controls(page, elem_id="execute_video", button_elem_id=None)
2848
  assert controls_after_execute["autoplay"] is True
 
1855
  page.wait_for_selector("#reference_action_btn button, button#reference_action_btn", timeout=15000)
1856
 
1857
  expected_reference_log = config_module.UI_TEXT["log"]["reference_action_message_with_coords"].format(
1858
+ option_label="A",
 
1859
  coords_text="5, 6",
1860
  )
1861
  page.locator("#reference_action_btn button, button#reference_action_btn").first.click()
 
2816
  arg={"label": "B"},
2817
  timeout=5000,
2818
  )
2819
+ selected_coords = _read_coords_box_value(page)
2820
+ assert selected_coords is not None
2821
  page.locator("#exec_btn button, button#exec_btn").first.click()
2822
  page.wait_for_selector("#execute_video video", timeout=5000)
2823
  page.wait_for_function(
 
2843
  )
2844
  if not done:
2845
  execution_log = _read_log_output_value(page)
2846
+ assert execution_log == config_module.UI_TEXT["log"]["execute_action_prompt_with_coords"].format(
2847
+ label="B",
2848
+ coords_text=selected_coords,
2849
+ )
2850
  assert execution_log != config_module.UI_TEXT["log"]["point_selection_prompt"]
2851
  controls_after_execute = _read_demo_video_controls(page, elem_id="execute_video", button_elem_id=None)
2852
  assert controls_after_execute["autoplay"] is True
gradio-web/test/test_ui_text_config.py CHANGED
@@ -141,7 +141,7 @@ def test_on_execute_video_end_transition_clears_execution_video_log_state(reload
141
  "preserve_terminal_log": False,
142
  "terminal_log_value": None,
143
  "preserve_execute_video_log": True,
144
- "execute_video_log_value": "Executing Action b",
145
  },
146
  )
147
 
@@ -222,19 +222,19 @@ def test_on_option_select_preserves_execution_video_log_state(reload_module):
222
  "preserve_terminal_log": False,
223
  "terminal_log_value": None,
224
  "preserve_execute_video_log": True,
225
- "execute_video_log_value": "Executing Action b",
226
  },
227
  )
228
 
229
  assert coords_update.get("__type__") == "update"
230
  assert img_update.get("__type__") == "update"
231
- assert log_update["value"] == "Executing Action b"
232
  assert suppress_flag is False
233
  assert log_state == {
234
  "preserve_terminal_log": False,
235
  "terminal_log_value": None,
236
  "preserve_execute_video_log": True,
237
- "execute_video_log_value": "Executing Action b",
238
  }
239
 
240
 
 
141
  "preserve_terminal_log": False,
142
  "terminal_log_value": None,
143
  "preserve_execute_video_log": True,
144
+ "execute_video_log_value": "Executing: B",
145
  },
146
  )
147
 
 
222
  "preserve_terminal_log": False,
223
  "terminal_log_value": None,
224
  "preserve_execute_video_log": True,
225
+ "execute_video_log_value": "Executing: B",
226
  },
227
  )
228
 
229
  assert coords_update.get("__type__") == "update"
230
  assert img_update.get("__type__") == "update"
231
+ assert log_update["value"] == "Executing: B"
232
  assert suppress_flag is False
233
  assert log_state == {
234
  "preserve_terminal_log": False,
235
  "terminal_log_value": None,
236
  "preserve_execute_video_log": True,
237
+ "execute_video_log_value": "Executing: B",
238
  }
239
 
240