Anthony Liang commited on
Commit
1886c12
·
1 Parent(s): 6fd468d

one more time

Browse files
Files changed (1) hide show
  1. app.py +740 -793
app.py CHANGED
@@ -597,54 +597,24 @@ def process_two_videos(
597
 
598
 
599
 
600
- # Custom CSS for collapsible sidebar
601
- custom_css = """
602
- #sidebar-toggle {
603
- position: sticky;
604
- top: 10px;
605
- z-index: 1000;
606
- margin-bottom: 10px;
607
- }
608
-
609
- /* Make sidebar full height */
610
- .gradio-container .gradio-row:has(#sidebar-column) {
611
- min-height: calc(100vh - 200px);
612
- }
613
-
614
- /* Smooth transition for sidebar (if supported) */
615
- #sidebar-column {
616
- transition: width 0.3s ease;
617
- }
618
- """
619
-
620
  # Create Gradio interface
621
  try:
622
  # Try with theme (Gradio 4.0+)
623
- demo = gr.Blocks(title="RFM Evaluation Server", theme=gr.themes.Soft(), css=custom_css)
624
  except TypeError:
625
  # Fallback for older Gradio versions without theme support
626
- try:
627
- demo = gr.Blocks(title="RFM Evaluation Server", css=custom_css)
628
- except TypeError:
629
- # Even older versions may not support css parameter
630
- demo = gr.Blocks(title="RFM Evaluation Server")
631
 
632
  with demo:
633
- # Header with title and toggle button
634
- with gr.Row():
635
- sidebar_toggle_btn = gr.Button("☰ Hide Sidebar", variant="secondary", size="sm", elem_id="sidebar-toggle", scale=0)
636
- with gr.Column(scale=1):
637
- gr.Markdown(
638
- """
639
- # RFM (Reward Foundation Model) Evaluation Server
640
- """,
641
- elem_id="main-title",
642
- )
643
 
644
  # Hidden state to store server URL and model mapping (define before use)
645
  server_url_state = gr.State(value=None)
646
  model_url_mapping_state = gr.State(value={}) # Maps model_name -> server_url
647
- sidebar_visible_state = gr.State(value=True) # Track sidebar visibility
648
 
649
  # Function definitions for event handlers
650
  def discover_and_select_models(base_url: str):
@@ -727,812 +697,789 @@ with demo:
727
  server_url,
728
  )
729
 
730
- # Toggle function for sidebar
731
- def toggle_sidebar(visible):
732
- """Toggle sidebar visibility."""
733
- new_visible = not visible
734
- button_text = "☰ Show Sidebar" if not new_visible else "☰ Hide Sidebar"
735
- return (
736
- new_visible, # Update state
737
- gr.update(visible=new_visible), # Update sidebar visibility
738
- gr.update(value=button_text), # Update button text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739
  )
740
 
741
- # Main layout with sidebar and content area
742
- with gr.Row():
743
- # Sidebar for model selection and info (controlled by visibility state)
744
- sidebar_column = gr.Column(scale=1, min_width=300, visible=True, elem_id="sidebar-column")
745
- with sidebar_column:
746
- gr.Markdown("### 🔧 Model Configuration")
747
-
748
- base_url_input = gr.Textbox(
749
- label="Base Server URL",
750
- placeholder="http://40.119.56.66",
751
- value="http://40.119.56.66",
752
- interactive=True,
753
- )
754
-
755
- discover_btn = gr.Button("🔍 Discover Models", variant="primary", size="lg")
756
-
757
- model_dropdown = gr.Dropdown(
758
- label="Select Model",
759
- choices=[],
760
- value=None,
761
- interactive=True,
762
- info="Models will be discovered on ports 8000-8010",
763
- )
764
-
765
- server_status = gr.Markdown(
766
- "Click 'Discover Models' to find available models",
767
- visible=True,
768
- )
769
-
770
- gr.Markdown("---")
771
- gr.Markdown("### 📋 Model Information")
772
- model_info_display = gr.Markdown("", visible=True)
773
-
774
- # Event handlers for sidebar
775
- discover_btn.click(
776
- fn=discover_and_select_models,
777
- inputs=[base_url_input],
778
- outputs=[model_dropdown, server_status, model_info_display, server_url_state, model_url_mapping_state],
779
- )
780
-
781
- model_dropdown.change(
782
- fn=on_model_selected,
783
- inputs=[model_dropdown, model_url_mapping_state],
784
- outputs=[server_status, model_info_display, server_url_state],
785
- )
786
-
787
- # Main content area with tabs
788
- with gr.Column(scale=4):
789
- with gr.Tabs():
790
- with gr.Tab("Progress Prediction"):
791
- gr.Markdown("### Progress & Success Prediction")
792
- gr.Markdown("Upload a video or select one from a dataset to get progress predictions.")
793
-
794
- with gr.Row():
795
- with gr.Column():
796
- single_video_input = gr.Video(label="Upload Video", height=300)
797
- task_text_input = gr.Textbox(
798
- label="Task Description",
799
- placeholder="Describe the task (e.g., 'Pick up the red block')",
800
- value="Complete the task",
801
- )
802
- fps_input_single = gr.Slider(
803
- label="FPS (Frames Per Second)",
804
- minimum=0.1,
805
- maximum=10.0,
806
- value=1.0,
807
- step=0.1,
808
- info="Frames per second to extract from video (higher = more frames)",
809
- )
810
- analyze_single_btn = gr.Button("Analyze Video", variant="primary")
811
-
812
- gr.Markdown("---")
813
- gr.Markdown("**OR Select from Dataset**")
814
- gr.Markdown("---")
815
-
816
- with gr.Accordion("📁 Select from Dataset", open=False):
817
- dataset_name_single = gr.Dropdown(
818
- choices=PREDEFINED_DATASETS,
819
- value="jesbu1/oxe_rfm",
820
- label="Dataset Name",
821
- allow_custom_value=True,
822
- )
823
- config_name_single = gr.Dropdown(
824
- choices=[], value="", label="Configuration Name", allow_custom_value=True
825
- )
826
- with gr.Row():
827
- refresh_configs_btn = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
828
- load_dataset_btn = gr.Button("Load Dataset", variant="secondary", size="sm")
829
-
830
- dataset_status_single = gr.Markdown("", visible=False)
831
- with gr.Row():
832
- prev_traj_btn = gr.Button("⬅️ Prev", variant="secondary", size="sm")
833
- trajectory_slider = gr.Slider(
834
- minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
835
- )
836
- next_traj_btn = gr.Button("Next ➡️", variant="secondary", size="sm")
837
- trajectory_metadata = gr.Markdown("", visible=False)
838
- use_dataset_video_btn = gr.Button("Use Selected Video", variant="secondary")
839
-
840
- with gr.Column():
841
- progress_plot = gr.Image(label="Progress & Success Prediction", height=400)
842
- info_output = gr.Markdown("")
843
-
844
- # State variables for dataset
845
- current_dataset_single = gr.State(None)
846
-
847
- def update_config_choices_single(dataset_name):
848
- """Update config choices when dataset changes."""
849
- if not dataset_name:
850
- return gr.update(choices=[], value="")
851
- try:
852
- configs = get_available_configs(dataset_name)
853
- if configs:
854
- return gr.update(choices=configs, value=configs[0])
855
- else:
856
- return gr.update(choices=[], value="")
857
- except Exception as e:
858
- logger.warning(f"Could not fetch configs: {e}")
859
- return gr.update(choices=[], value="")
860
-
861
- def load_dataset_single(dataset_name, config_name):
862
- """Load dataset and update slider."""
863
- dataset, status = load_rfm_dataset(dataset_name, config_name)
864
- if dataset is not None:
865
- max_index = len(dataset) - 1
866
- return (
867
- dataset,
868
- gr.update(value=status, visible=True),
869
- gr.update(
870
- maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
871
- ),
872
- )
873
- else:
874
- return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
875
-
876
- def use_dataset_video(dataset, index, dataset_name):
877
- """Load video from dataset and update inputs."""
878
- if dataset is None:
879
- return (
880
- None,
881
- "Complete the task",
882
- gr.update(value="No dataset loaded", visible=True),
883
- gr.update(visible=False),
884
- )
885
-
886
- video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
887
- if video_path:
888
- # Build metadata text
889
- metadata_lines = []
890
- if quality_label:
891
- metadata_lines.append(f"**Quality Label:** {quality_label}")
892
- if partial_success is not None:
893
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
894
-
895
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
896
- status_text = f"✅ Loaded trajectory {index} from dataset"
897
- if metadata_text:
898
- status_text += f"\n\n{metadata_text}"
899
-
900
- return (
901
- video_path,
902
- task,
903
- gr.update(value=status_text, visible=True),
904
- gr.update(value=metadata_text, visible=bool(metadata_text)),
905
- )
906
- else:
907
- return (
908
- None,
909
- "Complete the task",
910
- gr.update(value="❌ Error loading trajectory", visible=True),
911
- gr.update(visible=False),
912
- )
913
 
914
- def next_trajectory(dataset, current_idx, dataset_name):
915
- """Go to next trajectory."""
916
- if dataset is None:
917
- return 0, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
918
- next_idx = min(current_idx + 1, len(dataset) - 1)
919
- video_path, task, quality_label, partial_success = get_trajectory_video_path(
920
- dataset, next_idx, dataset_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
  )
922
-
923
- if video_path:
924
- # Build metadata text
925
- metadata_lines = []
926
- if quality_label:
927
- metadata_lines.append(f"**Quality Label:** {quality_label}")
928
- if partial_success is not None:
929
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
930
-
931
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
932
- return (
933
- next_idx,
934
- video_path,
935
- task,
936
- gr.update(value=metadata_text, visible=bool(metadata_text)),
937
- gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
938
- )
939
- else:
940
- return current_idx, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
941
-
942
- def prev_trajectory(dataset, current_idx, dataset_name):
943
- """Go to previous trajectory."""
944
- if dataset is None:
945
- return 0, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
946
- prev_idx = max(current_idx - 1, 0)
947
- video_path, task, quality_label, partial_success = get_trajectory_video_path(
948
- dataset, prev_idx, dataset_name
949
  )
950
-
951
- if video_path:
952
- # Build metadata text
953
- metadata_lines = []
954
- if quality_label:
955
- metadata_lines.append(f"**Quality Label:** {quality_label}")
956
- if partial_success is not None:
957
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
958
-
959
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
960
- return (
961
- prev_idx,
962
- video_path,
963
- task,
964
- gr.update(value=metadata_text, visible=bool(metadata_text)),
965
- gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
966
- )
967
- else:
968
- return current_idx, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
969
-
970
- def update_trajectory_on_slider_change(dataset, index, dataset_name):
971
- """Update trajectory metadata when slider changes."""
972
- if dataset is None:
973
- return gr.update(visible=False), gr.update(visible=False)
974
-
975
- video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
976
- if video_path:
977
- # Build metadata text
978
- metadata_lines = []
979
- if quality_label:
980
- metadata_lines.append(f"**Quality Label:** {quality_label}")
981
- if partial_success is not None:
982
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
983
-
984
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
985
- return (
986
- gr.update(value=metadata_text, visible=bool(metadata_text)),
987
- gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
988
  )
989
- else:
990
- return gr.update(visible=False), gr.update(visible=False)
 
991
 
992
- # Dataset selection handlers
993
- dataset_name_single.change(
994
- fn=update_config_choices_single, inputs=[dataset_name_single], outputs=[config_name_single]
995
- )
996
 
997
- refresh_configs_btn.click(
998
- fn=update_config_choices_single, inputs=[dataset_name_single], outputs=[config_name_single]
999
- )
1000
 
1001
- load_dataset_btn.click(
1002
- fn=load_dataset_single,
1003
- inputs=[dataset_name_single, config_name_single],
1004
- outputs=[current_dataset_single, dataset_status_single, trajectory_slider],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1005
  )
1006
-
1007
- use_dataset_video_btn.click(
1008
- fn=use_dataset_video,
1009
- inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
1010
- outputs=[single_video_input, task_text_input, dataset_status_single, trajectory_metadata],
 
 
 
 
 
 
1011
  )
1012
 
1013
- # Navigation buttons
1014
- next_traj_btn.click(
1015
- fn=next_trajectory,
1016
- inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
1017
- outputs=[
1018
- trajectory_slider,
1019
- single_video_input,
1020
- task_text_input,
1021
- trajectory_metadata,
1022
- dataset_status_single,
1023
- ],
 
 
 
 
 
 
 
 
1024
  )
1025
-
1026
- prev_traj_btn.click(
1027
- fn=prev_trajectory,
1028
- inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
1029
- outputs=[
1030
- trajectory_slider,
1031
- single_video_input,
1032
- task_text_input,
1033
- trajectory_metadata,
1034
- dataset_status_single,
1035
- ],
1036
  )
1037
 
1038
- # Update metadata when slider changes
1039
- trajectory_slider.change(
1040
- fn=update_trajectory_on_slider_change,
1041
- inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
1042
- outputs=[trajectory_metadata, dataset_status_single],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
  )
1044
-
1045
- analyze_single_btn.click(
1046
- fn=process_single_video,
1047
- inputs=[single_video_input, task_text_input, server_url_state, fps_input_single],
1048
- outputs=[progress_plot, info_output],
1049
- api_name="process_single_video",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1050
  )
 
 
1051
 
1052
- with gr.Tab("Preference/Similarity Analysis"):
1053
- gr.Markdown("### Preference & Similarity Prediction")
1054
- with gr.Row():
1055
- with gr.Column():
1056
- video_a_input = gr.Video(label="Video A", height=250)
1057
- video_b_input = gr.Video(label="Video B", height=250)
1058
- task_text_dual = gr.Textbox(
1059
- label="Task Description",
1060
- placeholder="Describe the task",
1061
- value="Complete the task",
1062
- )
1063
- prediction_type = gr.Radio(
1064
- choices=["preference", "similarity", "progress"],
1065
- value="preference",
1066
- label="Prediction Type",
1067
- )
1068
- fps_input_dual = gr.Slider(
1069
- label="FPS (Frames Per Second)",
1070
- minimum=0.1,
1071
- maximum=10.0,
1072
- value=1.0,
1073
- step=0.1,
1074
- info="Frames per second to extract from videos (higher = more frames)",
1075
- )
1076
- analyze_dual_btn = gr.Button("Compare Videos", variant="primary")
1077
-
1078
- gr.Markdown("---")
1079
- gr.Markdown("**OR Select from Dataset**")
1080
- gr.Markdown("---")
1081
-
1082
- with gr.Accordion("📁 Video A - Select from Dataset", open=False):
1083
- dataset_name_a = gr.Dropdown(
1084
- choices=PREDEFINED_DATASETS,
1085
- value="jesbu1/oxe_rfm",
1086
- label="Dataset Name",
1087
- allow_custom_value=True,
1088
- )
1089
- config_name_a = gr.Dropdown(
1090
- choices=[], value="", label="Configuration Name", allow_custom_value=True
1091
- )
1092
- with gr.Row():
1093
- refresh_configs_btn_a = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
1094
- load_dataset_btn_a = gr.Button("Load Dataset", variant="secondary", size="sm")
1095
-
1096
- dataset_status_a = gr.Markdown("", visible=False)
1097
- with gr.Row():
1098
- prev_traj_btn_a = gr.Button("⬅️ Prev", variant="secondary", size="sm")
1099
- trajectory_slider_a = gr.Slider(
1100
- minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
1101
- )
1102
- next_traj_btn_a = gr.Button("Next ➡️", variant="secondary", size="sm")
1103
- trajectory_metadata_a = gr.Markdown("", visible=False)
1104
- use_dataset_video_btn_a = gr.Button("Use Selected Video for A", variant="secondary")
1105
-
1106
- with gr.Accordion("📁 Video B - Select from Dataset", open=False):
1107
- dataset_name_b = gr.Dropdown(
1108
- choices=PREDEFINED_DATASETS,
1109
- value="jesbu1/oxe_rfm",
1110
- label="Dataset Name",
1111
- allow_custom_value=True,
1112
- )
1113
- config_name_b = gr.Dropdown(
1114
- choices=[], value="", label="Configuration Name", allow_custom_value=True
1115
- )
1116
- with gr.Row():
1117
- refresh_configs_btn_b = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
1118
- load_dataset_btn_b = gr.Button("Load Dataset", variant="secondary", size="sm")
1119
-
1120
- dataset_status_b = gr.Markdown("", visible=False)
1121
- with gr.Row():
1122
- prev_traj_btn_b = gr.Button("⬅️ Prev", variant="secondary", size="sm")
1123
- trajectory_slider_b = gr.Slider(
1124
- minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
1125
- )
1126
- next_traj_btn_b = gr.Button("Next ➡️", variant="secondary", size="sm")
1127
- trajectory_metadata_b = gr.Markdown("", visible=False)
1128
- use_dataset_video_btn_b = gr.Button("Use Selected Video for B", variant="secondary")
1129
-
1130
- with gr.Column():
1131
- # Videos displayed side by side
1132
- with gr.Row():
1133
- video_a_display = gr.Video(label="Video A", height=400)
1134
- video_b_display = gr.Video(label="Video B", height=400)
1135
-
1136
- # Result text at the bottom
1137
- result_text = gr.Markdown("")
1138
-
1139
- # State variables for datasets
1140
- current_dataset_a = gr.State(None)
1141
- current_dataset_b = gr.State(None)
1142
-
1143
- # Helper functions for Video A
1144
- def update_config_choices_a(dataset_name):
1145
- """Update config choices for Video A when dataset changes."""
1146
- if not dataset_name:
1147
- return gr.update(choices=[], value="")
1148
- try:
1149
- configs = get_available_configs(dataset_name)
1150
- if configs:
1151
- return gr.update(choices=configs, value=configs[0])
1152
- else:
1153
- return gr.update(choices=[], value="")
1154
- except Exception as e:
1155
- logger.warning(f"Could not fetch configs: {e}")
1156
- return gr.update(choices=[], value="")
1157
-
1158
- def load_dataset_a(dataset_name, config_name):
1159
- """Load dataset A and update slider."""
1160
- dataset, status = load_rfm_dataset(dataset_name, config_name)
1161
- if dataset is not None:
1162
- max_index = len(dataset) - 1
1163
- return (
1164
- dataset,
1165
- gr.update(value=status, visible=True),
1166
- gr.update(
1167
- maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
1168
- ),
1169
- )
1170
- else:
1171
- return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
1172
-
1173
- def use_dataset_video_a(dataset, index, dataset_name):
1174
- """Load video A from dataset and update input."""
1175
- if dataset is None:
1176
- return (
1177
- None,
1178
- gr.update(value="No dataset loaded", visible=True),
1179
- gr.update(visible=False),
1180
- )
1181
 
1182
- video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1183
- if video_path:
1184
- # Build metadata text
1185
- metadata_lines = []
1186
- if quality_label:
1187
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1188
- if partial_success is not None:
1189
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1190
-
1191
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1192
- status_text = f"✅ Loaded trajectory {index} from dataset for Video A"
1193
- if metadata_text:
1194
- status_text += f"\n\n{metadata_text}"
1195
-
1196
- return (
1197
- video_path,
1198
- gr.update(value=status_text, visible=True),
1199
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1200
- )
1201
- else:
1202
- return (
1203
- None,
1204
- gr.update(value="❌ Error loading trajectory", visible=True),
1205
- gr.update(visible=False),
1206
- )
1207
 
1208
- def next_trajectory_a(dataset, current_idx, dataset_name):
1209
- """Go to next trajectory for Video A."""
1210
- if dataset is None:
1211
- return 0, None, gr.update(visible=False), gr.update(visible=False)
1212
- next_idx = min(current_idx + 1, len(dataset) - 1)
1213
- video_path, task, quality_label, partial_success = get_trajectory_video_path(
1214
- dataset, next_idx, dataset_name
1215
- )
1216
 
1217
- if video_path:
1218
- # Build metadata text
1219
- metadata_lines = []
1220
- if quality_label:
1221
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1222
- if partial_success is not None:
1223
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1224
-
1225
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1226
- return (
1227
- next_idx,
1228
- video_path,
1229
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1230
- gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
1231
- )
1232
- else:
1233
- return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1234
-
1235
- def prev_trajectory_a(dataset, current_idx, dataset_name):
1236
- """Go to previous trajectory for Video A."""
1237
- if dataset is None:
1238
- return 0, None, gr.update(visible=False), gr.update(visible=False)
1239
- prev_idx = max(current_idx - 1, 0)
1240
- video_path, task, quality_label, partial_success = get_trajectory_video_path(
1241
- dataset, prev_idx, dataset_name
1242
- )
1243
 
1244
- if video_path:
1245
- # Build metadata text
1246
- metadata_lines = []
1247
- if quality_label:
1248
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1249
- if partial_success is not None:
1250
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1251
-
1252
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1253
- return (
1254
- prev_idx,
1255
- video_path,
1256
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1257
- gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
1258
- )
1259
- else:
1260
- return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1261
-
1262
- def update_trajectory_on_slider_change_a(dataset, index, dataset_name):
1263
- """Update trajectory metadata when slider changes for Video A."""
1264
- if dataset is None:
1265
- return gr.update(visible=False), gr.update(visible=False)
1266
-
1267
- video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1268
- if video_path:
1269
- # Build metadata text
1270
- metadata_lines = []
1271
- if quality_label:
1272
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1273
- if partial_success is not None:
1274
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1275
-
1276
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1277
- return (
1278
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1279
- gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
1280
- )
1281
- else:
1282
- return gr.update(visible=False), gr.update(visible=False)
1283
-
1284
- # Helper functions for Video B (same as Video A)
1285
- def update_config_choices_b(dataset_name):
1286
- """Update config choices for Video B when dataset changes."""
1287
- if not dataset_name:
1288
- return gr.update(choices=[], value="")
1289
- try:
1290
- configs = get_available_configs(dataset_name)
1291
- if configs:
1292
- return gr.update(choices=configs, value=configs[0])
1293
- else:
1294
- return gr.update(choices=[], value="")
1295
- except Exception as e:
1296
- logger.warning(f"Could not fetch configs: {e}")
1297
- return gr.update(choices=[], value="")
1298
-
1299
- def load_dataset_b(dataset_name, config_name):
1300
- """Load dataset B and update slider."""
1301
- dataset, status = load_rfm_dataset(dataset_name, config_name)
1302
- if dataset is not None:
1303
- max_index = len(dataset) - 1
1304
- return (
1305
- dataset,
1306
- gr.update(value=status, visible=True),
1307
- gr.update(
1308
- maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
1309
- ),
1310
- )
1311
- else:
1312
- return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
1313
-
1314
- def use_dataset_video_b(dataset, index, dataset_name):
1315
- """Load video B from dataset and update input."""
1316
- if dataset is None:
1317
- return (
1318
- None,
1319
- gr.update(value="No dataset loaded", visible=True),
1320
- gr.update(visible=False),
1321
- )
1322
 
1323
- video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1324
- if video_path:
1325
- # Build metadata text
1326
- metadata_lines = []
1327
- if quality_label:
1328
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1329
- if partial_success is not None:
1330
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1331
-
1332
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1333
- status_text = f"✅ Loaded trajectory {index} from dataset for Video B"
1334
- if metadata_text:
1335
- status_text += f"\n\n{metadata_text}"
1336
-
1337
- return (
1338
- video_path,
1339
- gr.update(value=status_text, visible=True),
1340
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1341
- )
1342
- else:
1343
- return (
1344
- None,
1345
- gr.update(value="❌ Error loading trajectory", visible=True),
1346
- gr.update(visible=False),
1347
- )
1348
 
1349
- def next_trajectory_b(dataset, current_idx, dataset_name):
1350
- """Go to next trajectory for Video B."""
1351
- if dataset is None:
1352
- return 0, None, gr.update(visible=False), gr.update(visible=False)
1353
- next_idx = min(current_idx + 1, len(dataset) - 1)
1354
- video_path, task, quality_label, partial_success = get_trajectory_video_path(
1355
- dataset, next_idx, dataset_name
1356
- )
1357
 
1358
- if video_path:
1359
- # Build metadata text
1360
- metadata_lines = []
1361
- if quality_label:
1362
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1363
- if partial_success is not None:
1364
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1365
-
1366
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1367
- return (
1368
- next_idx,
1369
- video_path,
1370
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1371
- gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
1372
- )
1373
- else:
1374
- return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1375
-
1376
- def prev_trajectory_b(dataset, current_idx, dataset_name):
1377
- """Go to previous trajectory for Video B."""
1378
- if dataset is None:
1379
- return 0, None, gr.update(visible=False), gr.update(visible=False)
1380
- prev_idx = max(current_idx - 1, 0)
1381
- video_path, task, quality_label, partial_success = get_trajectory_video_path(
1382
- dataset, prev_idx, dataset_name
1383
- )
1384
 
1385
- if video_path:
1386
- # Build metadata text
1387
- metadata_lines = []
1388
- if quality_label:
1389
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1390
- if partial_success is not None:
1391
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1392
-
1393
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1394
- return (
1395
- prev_idx,
1396
- video_path,
1397
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1398
- gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1399
  )
1400
- else:
1401
- return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1402
-
1403
- def update_trajectory_on_slider_change_b(dataset, index, dataset_name):
1404
- """Update trajectory metadata when slider changes for Video B."""
1405
- if dataset is None:
1406
- return gr.update(visible=False), gr.update(visible=False)
1407
-
1408
- video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1409
- if video_path:
1410
- # Build metadata text
1411
- metadata_lines = []
1412
- if quality_label:
1413
- metadata_lines.append(f"**Quality Label:** {quality_label}")
1414
- if partial_success is not None:
1415
- metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1416
-
1417
- metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1418
- return (
1419
- gr.update(value=metadata_text, visible=bool(metadata_text)),
1420
- gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
 
 
1421
  )
1422
- else:
1423
- return gr.update(visible=False), gr.update(visible=False)
 
1424
 
1425
- # Video A dataset selection handlers
1426
- dataset_name_a.change(
1427
- fn=update_config_choices_a, inputs=[dataset_name_a], outputs=[config_name_a]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1428
  )
1429
-
1430
- refresh_configs_btn_a.click(
1431
- fn=update_config_choices_a, inputs=[dataset_name_a], outputs=[config_name_a]
 
 
 
 
 
 
 
1432
  )
1433
 
1434
- load_dataset_btn_a.click(
1435
- fn=load_dataset_a,
1436
- inputs=[dataset_name_a, config_name_a],
1437
- outputs=[current_dataset_a, dataset_status_a, trajectory_slider_a],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1438
  )
1439
-
1440
- use_dataset_video_btn_a.click(
1441
- fn=use_dataset_video_a,
1442
- inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1443
- outputs=[video_a_input, dataset_status_a, trajectory_metadata_a],
1444
  )
1445
 
1446
- next_traj_btn_a.click(
1447
- fn=next_trajectory_a,
1448
- inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1449
- outputs=[
1450
- trajectory_slider_a,
1451
- video_a_input,
1452
- trajectory_metadata_a,
1453
- dataset_status_a,
1454
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1455
  )
1456
-
1457
- prev_traj_btn_a.click(
1458
- fn=prev_trajectory_a,
1459
- inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1460
- outputs=[
1461
- trajectory_slider_a,
1462
- video_a_input,
1463
- trajectory_metadata_a,
1464
- dataset_status_a,
1465
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1466
  )
1467
-
1468
- trajectory_slider_a.change(
1469
- fn=update_trajectory_on_slider_change_a,
1470
- inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1471
- outputs=[trajectory_metadata_a, dataset_status_a],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1472
  )
 
 
1473
 
1474
- # Video B dataset selection handlers
1475
- dataset_name_b.change(
1476
- fn=update_config_choices_b, inputs=[dataset_name_b], outputs=[config_name_b]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1477
  )
1478
-
1479
- refresh_configs_btn_b.click(
1480
- fn=update_config_choices_b, inputs=[dataset_name_b], outputs=[config_name_b]
 
 
 
 
 
 
 
1481
  )
1482
 
1483
- load_dataset_btn_b.click(
1484
- fn=load_dataset_b,
1485
- inputs=[dataset_name_b, config_name_b],
1486
- outputs=[current_dataset_b, dataset_status_b, trajectory_slider_b],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1487
  )
1488
-
1489
- use_dataset_video_btn_b.click(
1490
- fn=use_dataset_video_b,
1491
- inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1492
- outputs=[video_b_input, dataset_status_b, trajectory_metadata_b],
1493
  )
1494
 
1495
- next_traj_btn_b.click(
1496
- fn=next_trajectory_b,
1497
- inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1498
- outputs=[
1499
- trajectory_slider_b,
1500
- video_b_input,
1501
- trajectory_metadata_b,
1502
- dataset_status_b,
1503
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1504
  )
1505
-
1506
- prev_traj_btn_b.click(
1507
- fn=prev_trajectory_b,
1508
- inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1509
- outputs=[
1510
- trajectory_slider_b,
1511
- video_b_input,
1512
- trajectory_metadata_b,
1513
- dataset_status_b,
1514
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1515
  )
1516
-
1517
- trajectory_slider_b.change(
1518
- fn=update_trajectory_on_slider_change_b,
1519
- inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1520
- outputs=[trajectory_metadata_b, dataset_status_b],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1521
  )
 
 
1522
 
1523
- analyze_dual_btn.click(
1524
- fn=process_two_videos,
1525
- inputs=[video_a_input, video_b_input, task_text_dual, prediction_type, server_url_state, fps_input_dual],
1526
- outputs=[result_text, video_a_display, video_b_display],
1527
- api_name="process_two_videos",
1528
- )
1529
 
1530
- # Wire up the sidebar toggle button (can be anywhere in the demo block)
1531
- sidebar_toggle_btn.click(
1532
- fn=toggle_sidebar,
1533
- inputs=[sidebar_visible_state],
1534
- outputs=[sidebar_visible_state, sidebar_column, sidebar_toggle_btn],
1535
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1536
 
1537
 
1538
  def main():
 
597
 
598
 
599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  # Create Gradio interface
601
  try:
602
  # Try with theme (Gradio 4.0+)
603
+ demo = gr.Blocks(title="RFM Evaluation Server", theme=gr.themes.Soft())
604
  except TypeError:
605
  # Fallback for older Gradio versions without theme support
606
+ demo = gr.Blocks(title="RFM Evaluation Server")
 
 
 
 
607
 
608
  with demo:
609
+ gr.Markdown(
610
+ """
611
+ # RFM (Reward Foundation Model) Evaluation Server
612
+ """
613
+ )
 
 
 
 
 
614
 
615
  # Hidden state to store server URL and model mapping (define before use)
616
  server_url_state = gr.State(value=None)
617
  model_url_mapping_state = gr.State(value={}) # Maps model_name -> server_url
 
618
 
619
  # Function definitions for event handlers
620
  def discover_and_select_models(base_url: str):
 
697
  server_url,
698
  )
699
 
700
+ # Use Gradio's built-in Sidebar component (collapsible by default)
701
+ with gr.Sidebar():
702
+ gr.Markdown("### 🔧 Model Configuration")
703
+
704
+ base_url_input = gr.Textbox(
705
+ label="Base Server URL",
706
+ placeholder="http://40.119.56.66",
707
+ value="http://40.119.56.66",
708
+ interactive=True,
709
+ )
710
+
711
+ discover_btn = gr.Button("🔍 Discover Models", variant="primary", size="lg")
712
+
713
+ model_dropdown = gr.Dropdown(
714
+ label="Select Model",
715
+ choices=[],
716
+ value=None,
717
+ interactive=True,
718
+ info="Models will be discovered on ports 8000-8010",
719
+ )
720
+
721
+ server_status = gr.Markdown(
722
+ "Click 'Discover Models' to find available models"
723
+ )
724
+
725
+ gr.Markdown("---")
726
+ gr.Markdown("### 📋 Model Information")
727
+ model_info_display = gr.Markdown("")
728
+
729
+ # Event handlers for sidebar
730
+ discover_btn.click(
731
+ fn=discover_and_select_models,
732
+ inputs=[base_url_input],
733
+ outputs=[model_dropdown, server_status, model_info_display, server_url_state, model_url_mapping_state],
734
  )
735
 
736
+ model_dropdown.change(
737
+ fn=on_model_selected,
738
+ inputs=[model_dropdown, model_url_mapping_state],
739
+ outputs=[server_status, model_info_display, server_url_state],
740
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
+ # Main content area with tabs
743
+ with gr.Tabs():
744
+ with gr.Tab("Progress Prediction"):
745
+ gr.Markdown("### Progress & Success Prediction")
746
+ gr.Markdown("Upload a video or select one from a dataset to get progress predictions.")
747
+
748
+ with gr.Row():
749
+ with gr.Column():
750
+ single_video_input = gr.Video(label="Upload Video", height=300)
751
+ task_text_input = gr.Textbox(
752
+ label="Task Description",
753
+ placeholder="Describe the task (e.g., 'Pick up the red block')",
754
+ value="Complete the task",
755
+ )
756
+ fps_input_single = gr.Slider(
757
+ label="FPS (Frames Per Second)",
758
+ minimum=0.1,
759
+ maximum=10.0,
760
+ value=1.0,
761
+ step=0.1,
762
+ info="Frames per second to extract from video (higher = more frames)",
763
+ )
764
+ analyze_single_btn = gr.Button("Analyze Video", variant="primary")
765
+
766
+ gr.Markdown("---")
767
+ gr.Markdown("**OR Select from Dataset**")
768
+ gr.Markdown("---")
769
+
770
+ with gr.Accordion("📁 Select from Dataset", open=False):
771
+ dataset_name_single = gr.Dropdown(
772
+ choices=PREDEFINED_DATASETS,
773
+ value="jesbu1/oxe_rfm",
774
+ label="Dataset Name",
775
+ allow_custom_value=True,
776
  )
777
+ config_name_single = gr.Dropdown(
778
+ choices=[], value="", label="Configuration Name", allow_custom_value=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
779
  )
780
+ with gr.Row():
781
+ refresh_configs_btn = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
782
+ load_dataset_btn = gr.Button("Load Dataset", variant="secondary", size="sm")
783
+
784
+ dataset_status_single = gr.Markdown("", visible=False)
785
+ with gr.Row():
786
+ prev_traj_btn = gr.Button("⬅️ Prev", variant="secondary", size="sm")
787
+ trajectory_slider = gr.Slider(
788
+ minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
789
  )
790
+ next_traj_btn = gr.Button("Next ➡️", variant="secondary", size="sm")
791
+ trajectory_metadata = gr.Markdown("", visible=False)
792
+ use_dataset_video_btn = gr.Button("Use Selected Video", variant="secondary")
793
 
794
+ with gr.Column():
795
+ progress_plot = gr.Image(label="Progress & Success Prediction", height=400)
796
+ info_output = gr.Markdown("")
 
797
 
798
+ # State variables for dataset
799
+ current_dataset_single = gr.State(None)
 
800
 
801
+ def update_config_choices_single(dataset_name):
802
+ """Update config choices when dataset changes."""
803
+ if not dataset_name:
804
+ return gr.update(choices=[], value="")
805
+ try:
806
+ configs = get_available_configs(dataset_name)
807
+ if configs:
808
+ return gr.update(choices=configs, value=configs[0])
809
+ else:
810
+ return gr.update(choices=[], value="")
811
+ except Exception as e:
812
+ logger.warning(f"Could not fetch configs: {e}")
813
+ return gr.update(choices=[], value="")
814
+
815
+ def load_dataset_single(dataset_name, config_name):
816
+ """Load dataset and update slider."""
817
+ dataset, status = load_rfm_dataset(dataset_name, config_name)
818
+ if dataset is not None:
819
+ max_index = len(dataset) - 1
820
+ return (
821
+ dataset,
822
+ gr.update(value=status, visible=True),
823
+ gr.update(
824
+ maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
825
+ ),
826
  )
827
+ else:
828
+ return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
829
+
830
+ def use_dataset_video(dataset, index, dataset_name):
831
+ """Load video from dataset and update inputs."""
832
+ if dataset is None:
833
+ return (
834
+ None,
835
+ "Complete the task",
836
+ gr.update(value="No dataset loaded", visible=True),
837
+ gr.update(visible=False),
838
  )
839
 
840
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
841
+ if video_path:
842
+ # Build metadata text
843
+ metadata_lines = []
844
+ if quality_label:
845
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
846
+ if partial_success is not None:
847
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
848
+
849
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
850
+ status_text = f"✅ Loaded trajectory {index} from dataset"
851
+ if metadata_text:
852
+ status_text += f"\n\n{metadata_text}"
853
+
854
+ return (
855
+ video_path,
856
+ task,
857
+ gr.update(value=status_text, visible=True),
858
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
859
  )
860
+ else:
861
+ return (
862
+ None,
863
+ "Complete the task",
864
+ gr.update(value="❌ Error loading trajectory", visible=True),
865
+ gr.update(visible=False),
 
 
 
 
 
866
  )
867
 
868
+ def next_trajectory(dataset, current_idx, dataset_name):
869
+ """Go to next trajectory."""
870
+ if dataset is None:
871
+ return 0, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
872
+ next_idx = min(current_idx + 1, len(dataset) - 1)
873
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(
874
+ dataset, next_idx, dataset_name
875
+ )
876
+
877
+ if video_path:
878
+ # Build metadata text
879
+ metadata_lines = []
880
+ if quality_label:
881
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
882
+ if partial_success is not None:
883
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
884
+
885
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
886
+ return (
887
+ next_idx,
888
+ video_path,
889
+ task,
890
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
891
+ gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
892
  )
893
+ else:
894
+ return current_idx, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
895
+
896
+ def prev_trajectory(dataset, current_idx, dataset_name):
897
+ """Go to previous trajectory."""
898
+ if dataset is None:
899
+ return 0, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
900
+ prev_idx = max(current_idx - 1, 0)
901
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(
902
+ dataset, prev_idx, dataset_name
903
+ )
904
+
905
+ if video_path:
906
+ # Build metadata text
907
+ metadata_lines = []
908
+ if quality_label:
909
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
910
+ if partial_success is not None:
911
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
912
+
913
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
914
+ return (
915
+ prev_idx,
916
+ video_path,
917
+ task,
918
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
919
+ gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
920
+ )
921
+ else:
922
+ return current_idx, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
923
+
924
+ def update_trajectory_on_slider_change(dataset, index, dataset_name):
925
+ """Update trajectory metadata when slider changes."""
926
+ if dataset is None:
927
+ return gr.update(visible=False), gr.update(visible=False)
928
+
929
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
930
+ if video_path:
931
+ # Build metadata text
932
+ metadata_lines = []
933
+ if quality_label:
934
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
935
+ if partial_success is not None:
936
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
937
+
938
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
939
+ return (
940
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
941
+ gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
942
  )
943
+ else:
944
+ return gr.update(visible=False), gr.update(visible=False)
945
 
946
+ # Dataset selection handlers
947
+ dataset_name_single.change(
948
+ fn=update_config_choices_single, inputs=[dataset_name_single], outputs=[config_name_single]
949
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
950
 
951
+ refresh_configs_btn.click(
952
+ fn=update_config_choices_single, inputs=[dataset_name_single], outputs=[config_name_single]
953
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
954
 
955
+ load_dataset_btn.click(
956
+ fn=load_dataset_single,
957
+ inputs=[dataset_name_single, config_name_single],
958
+ outputs=[current_dataset_single, dataset_status_single, trajectory_slider],
959
+ )
 
 
 
960
 
961
+ use_dataset_video_btn.click(
962
+ fn=use_dataset_video,
963
+ inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
964
+ outputs=[single_video_input, task_text_input, dataset_status_single, trajectory_metadata],
965
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966
 
967
+ # Navigation buttons
968
+ next_traj_btn.click(
969
+ fn=next_trajectory,
970
+ inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
971
+ outputs=[
972
+ trajectory_slider,
973
+ single_video_input,
974
+ task_text_input,
975
+ trajectory_metadata,
976
+ dataset_status_single,
977
+ ],
978
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
979
 
980
+ prev_traj_btn.click(
981
+ fn=prev_trajectory,
982
+ inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
983
+ outputs=[
984
+ trajectory_slider,
985
+ single_video_input,
986
+ task_text_input,
987
+ trajectory_metadata,
988
+ dataset_status_single,
989
+ ],
990
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
 
992
+ # Update metadata when slider changes
993
+ trajectory_slider.change(
994
+ fn=update_trajectory_on_slider_change,
995
+ inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
996
+ outputs=[trajectory_metadata, dataset_status_single],
997
+ )
 
 
998
 
999
+ analyze_single_btn.click(
1000
+ fn=process_single_video,
1001
+ inputs=[single_video_input, task_text_input, server_url_state, fps_input_single],
1002
+ outputs=[progress_plot, info_output],
1003
+ api_name="process_single_video",
1004
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1005
 
1006
+ with gr.Tab("Preference/Similarity Analysis"):
1007
+ gr.Markdown("### Preference & Similarity Prediction")
1008
+ with gr.Row():
1009
+ with gr.Column():
1010
+ video_a_input = gr.Video(label="Video A", height=250)
1011
+ video_b_input = gr.Video(label="Video B", height=250)
1012
+ task_text_dual = gr.Textbox(
1013
+ label="Task Description",
1014
+ placeholder="Describe the task",
1015
+ value="Complete the task",
1016
+ )
1017
+ prediction_type = gr.Radio(
1018
+ choices=["preference", "similarity", "progress"],
1019
+ value="preference",
1020
+ label="Prediction Type",
1021
+ )
1022
+ fps_input_dual = gr.Slider(
1023
+ label="FPS (Frames Per Second)",
1024
+ minimum=0.1,
1025
+ maximum=10.0,
1026
+ value=1.0,
1027
+ step=0.1,
1028
+ info="Frames per second to extract from videos (higher = more frames)",
1029
+ )
1030
+ analyze_dual_btn = gr.Button("Compare Videos", variant="primary")
1031
+
1032
+ gr.Markdown("---")
1033
+ gr.Markdown("**OR Select from Dataset**")
1034
+ gr.Markdown("---")
1035
+
1036
+ with gr.Accordion("📁 Video A - Select from Dataset", open=False):
1037
+ dataset_name_a = gr.Dropdown(
1038
+ choices=PREDEFINED_DATASETS,
1039
+ value="jesbu1/oxe_rfm",
1040
+ label="Dataset Name",
1041
+ allow_custom_value=True,
1042
+ )
1043
+ config_name_a = gr.Dropdown(
1044
+ choices=[], value="", label="Configuration Name", allow_custom_value=True
1045
+ )
1046
+ with gr.Row():
1047
+ refresh_configs_btn_a = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
1048
+ load_dataset_btn_a = gr.Button("Load Dataset", variant="secondary", size="sm")
1049
+
1050
+ dataset_status_a = gr.Markdown("", visible=False)
1051
+ with gr.Row():
1052
+ prev_traj_btn_a = gr.Button("⬅️ Prev", variant="secondary", size="sm")
1053
+ trajectory_slider_a = gr.Slider(
1054
+ minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
1055
  )
1056
+ next_traj_btn_a = gr.Button("Next ➡️", variant="secondary", size="sm")
1057
+ trajectory_metadata_a = gr.Markdown("", visible=False)
1058
+ use_dataset_video_btn_a = gr.Button("Use Selected Video for A", variant="secondary")
1059
+
1060
+ with gr.Accordion("📁 Video B - Select from Dataset", open=False):
1061
+ dataset_name_b = gr.Dropdown(
1062
+ choices=PREDEFINED_DATASETS,
1063
+ value="jesbu1/oxe_rfm",
1064
+ label="Dataset Name",
1065
+ allow_custom_value=True,
1066
+ )
1067
+ config_name_b = gr.Dropdown(
1068
+ choices=[], value="", label="Configuration Name", allow_custom_value=True
1069
+ )
1070
+ with gr.Row():
1071
+ refresh_configs_btn_b = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
1072
+ load_dataset_btn_b = gr.Button("Load Dataset", variant="secondary", size="sm")
1073
+
1074
+ dataset_status_b = gr.Markdown("", visible=False)
1075
+ with gr.Row():
1076
+ prev_traj_btn_b = gr.Button("⬅️ Prev", variant="secondary", size="sm")
1077
+ trajectory_slider_b = gr.Slider(
1078
+ minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
1079
  )
1080
+ next_traj_btn_b = gr.Button("Next ➡️", variant="secondary", size="sm")
1081
+ trajectory_metadata_b = gr.Markdown("", visible=False)
1082
+ use_dataset_video_btn_b = gr.Button("Use Selected Video for B", variant="secondary")
1083
 
1084
+ with gr.Column():
1085
+ # Videos displayed side by side
1086
+ with gr.Row():
1087
+ video_a_display = gr.Video(label="Video A", height=400)
1088
+ video_b_display = gr.Video(label="Video B", height=400)
1089
+
1090
+ # Result text at the bottom
1091
+ result_text = gr.Markdown("")
1092
+
1093
+ # State variables for datasets
1094
+ current_dataset_a = gr.State(None)
1095
+ current_dataset_b = gr.State(None)
1096
+
1097
+ # Helper functions for Video A
1098
+ def update_config_choices_a(dataset_name):
1099
+ """Update config choices for Video A when dataset changes."""
1100
+ if not dataset_name:
1101
+ return gr.update(choices=[], value="")
1102
+ try:
1103
+ configs = get_available_configs(dataset_name)
1104
+ if configs:
1105
+ return gr.update(choices=configs, value=configs[0])
1106
+ else:
1107
+ return gr.update(choices=[], value="")
1108
+ except Exception as e:
1109
+ logger.warning(f"Could not fetch configs: {e}")
1110
+ return gr.update(choices=[], value="")
1111
+
1112
+ def load_dataset_a(dataset_name, config_name):
1113
+ """Load dataset A and update slider."""
1114
+ dataset, status = load_rfm_dataset(dataset_name, config_name)
1115
+ if dataset is not None:
1116
+ max_index = len(dataset) - 1
1117
+ return (
1118
+ dataset,
1119
+ gr.update(value=status, visible=True),
1120
+ gr.update(
1121
+ maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
1122
+ ),
1123
  )
1124
+ else:
1125
+ return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
1126
+
1127
+ def use_dataset_video_a(dataset, index, dataset_name):
1128
+ """Load video A from dataset and update input."""
1129
+ if dataset is None:
1130
+ return (
1131
+ None,
1132
+ gr.update(value="No dataset loaded", visible=True),
1133
+ gr.update(visible=False),
1134
  )
1135
 
1136
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1137
+ if video_path:
1138
+ # Build metadata text
1139
+ metadata_lines = []
1140
+ if quality_label:
1141
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1142
+ if partial_success is not None:
1143
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1144
+
1145
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1146
+ status_text = f"✅ Loaded trajectory {index} from dataset for Video A"
1147
+ if metadata_text:
1148
+ status_text += f"\n\n{metadata_text}"
1149
+
1150
+ return (
1151
+ video_path,
1152
+ gr.update(value=status_text, visible=True),
1153
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1154
  )
1155
+ else:
1156
+ return (
1157
+ None,
1158
+ gr.update(value="❌ Error loading trajectory", visible=True),
1159
+ gr.update(visible=False),
1160
  )
1161
 
1162
+ def next_trajectory_a(dataset, current_idx, dataset_name):
1163
+ """Go to next trajectory for Video A."""
1164
+ if dataset is None:
1165
+ return 0, None, gr.update(visible=False), gr.update(visible=False)
1166
+ next_idx = min(current_idx + 1, len(dataset) - 1)
1167
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(
1168
+ dataset, next_idx, dataset_name
1169
+ )
1170
+
1171
+ if video_path:
1172
+ # Build metadata text
1173
+ metadata_lines = []
1174
+ if quality_label:
1175
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1176
+ if partial_success is not None:
1177
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1178
+
1179
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1180
+ return (
1181
+ next_idx,
1182
+ video_path,
1183
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1184
+ gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
1185
  )
1186
+ else:
1187
+ return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1188
+
1189
+ def prev_trajectory_a(dataset, current_idx, dataset_name):
1190
+ """Go to previous trajectory for Video A."""
1191
+ if dataset is None:
1192
+ return 0, None, gr.update(visible=False), gr.update(visible=False)
1193
+ prev_idx = max(current_idx - 1, 0)
1194
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(
1195
+ dataset, prev_idx, dataset_name
1196
+ )
1197
+
1198
+ if video_path:
1199
+ # Build metadata text
1200
+ metadata_lines = []
1201
+ if quality_label:
1202
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1203
+ if partial_success is not None:
1204
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1205
+
1206
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1207
+ return (
1208
+ prev_idx,
1209
+ video_path,
1210
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1211
+ gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
1212
  )
1213
+ else:
1214
+ return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1215
+
1216
+ def update_trajectory_on_slider_change_a(dataset, index, dataset_name):
1217
+ """Update trajectory metadata when slider changes for Video A."""
1218
+ if dataset is None:
1219
+ return gr.update(visible=False), gr.update(visible=False)
1220
+
1221
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1222
+ if video_path:
1223
+ # Build metadata text
1224
+ metadata_lines = []
1225
+ if quality_label:
1226
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1227
+ if partial_success is not None:
1228
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1229
+
1230
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1231
+ return (
1232
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1233
+ gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
1234
  )
1235
+ else:
1236
+ return gr.update(visible=False), gr.update(visible=False)
1237
 
1238
+ # Helper functions for Video B (same as Video A)
1239
+ def update_config_choices_b(dataset_name):
1240
+ """Update config choices for Video B when dataset changes."""
1241
+ if not dataset_name:
1242
+ return gr.update(choices=[], value="")
1243
+ try:
1244
+ configs = get_available_configs(dataset_name)
1245
+ if configs:
1246
+ return gr.update(choices=configs, value=configs[0])
1247
+ else:
1248
+ return gr.update(choices=[], value="")
1249
+ except Exception as e:
1250
+ logger.warning(f"Could not fetch configs: {e}")
1251
+ return gr.update(choices=[], value="")
1252
+
1253
+ def load_dataset_b(dataset_name, config_name):
1254
+ """Load dataset B and update slider."""
1255
+ dataset, status = load_rfm_dataset(dataset_name, config_name)
1256
+ if dataset is not None:
1257
+ max_index = len(dataset) - 1
1258
+ return (
1259
+ dataset,
1260
+ gr.update(value=status, visible=True),
1261
+ gr.update(
1262
+ maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
1263
+ ),
1264
  )
1265
+ else:
1266
+ return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
1267
+
1268
+ def use_dataset_video_b(dataset, index, dataset_name):
1269
+ """Load video B from dataset and update input."""
1270
+ if dataset is None:
1271
+ return (
1272
+ None,
1273
+ gr.update(value="No dataset loaded", visible=True),
1274
+ gr.update(visible=False),
1275
  )
1276
 
1277
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1278
+ if video_path:
1279
+ # Build metadata text
1280
+ metadata_lines = []
1281
+ if quality_label:
1282
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1283
+ if partial_success is not None:
1284
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1285
+
1286
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1287
+ status_text = f"✅ Loaded trajectory {index} from dataset for Video B"
1288
+ if metadata_text:
1289
+ status_text += f"\n\n{metadata_text}"
1290
+
1291
+ return (
1292
+ video_path,
1293
+ gr.update(value=status_text, visible=True),
1294
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1295
  )
1296
+ else:
1297
+ return (
1298
+ None,
1299
+ gr.update(value="❌ Error loading trajectory", visible=True),
1300
+ gr.update(visible=False),
1301
  )
1302
 
1303
+ def next_trajectory_b(dataset, current_idx, dataset_name):
1304
+ """Go to next trajectory for Video B."""
1305
+ if dataset is None:
1306
+ return 0, None, gr.update(visible=False), gr.update(visible=False)
1307
+ next_idx = min(current_idx + 1, len(dataset) - 1)
1308
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(
1309
+ dataset, next_idx, dataset_name
1310
+ )
1311
+
1312
+ if video_path:
1313
+ # Build metadata text
1314
+ metadata_lines = []
1315
+ if quality_label:
1316
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1317
+ if partial_success is not None:
1318
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1319
+
1320
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1321
+ return (
1322
+ next_idx,
1323
+ video_path,
1324
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1325
+ gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
1326
  )
1327
+ else:
1328
+ return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1329
+
1330
+ def prev_trajectory_b(dataset, current_idx, dataset_name):
1331
+ """Go to previous trajectory for Video B."""
1332
+ if dataset is None:
1333
+ return 0, None, gr.update(visible=False), gr.update(visible=False)
1334
+ prev_idx = max(current_idx - 1, 0)
1335
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(
1336
+ dataset, prev_idx, dataset_name
1337
+ )
1338
+
1339
+ if video_path:
1340
+ # Build metadata text
1341
+ metadata_lines = []
1342
+ if quality_label:
1343
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1344
+ if partial_success is not None:
1345
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1346
+
1347
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1348
+ return (
1349
+ prev_idx,
1350
+ video_path,
1351
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1352
+ gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
1353
  )
1354
+ else:
1355
+ return current_idx, None, gr.update(visible=False), gr.update(visible=False)
1356
+
1357
+ def update_trajectory_on_slider_change_b(dataset, index, dataset_name):
1358
+ """Update trajectory metadata when slider changes for Video B."""
1359
+ if dataset is None:
1360
+ return gr.update(visible=False), gr.update(visible=False)
1361
+
1362
+ video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
1363
+ if video_path:
1364
+ # Build metadata text
1365
+ metadata_lines = []
1366
+ if quality_label:
1367
+ metadata_lines.append(f"**Quality Label:** {quality_label}")
1368
+ if partial_success is not None:
1369
+ metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
1370
+
1371
+ metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
1372
+ return (
1373
+ gr.update(value=metadata_text, visible=bool(metadata_text)),
1374
+ gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
1375
  )
1376
+ else:
1377
+ return gr.update(visible=False), gr.update(visible=False)
1378
 
1379
+ # Video A dataset selection handlers
1380
+ dataset_name_a.change(
1381
+ fn=update_config_choices_a, inputs=[dataset_name_a], outputs=[config_name_a]
1382
+ )
 
 
1383
 
1384
+ refresh_configs_btn_a.click(
1385
+ fn=update_config_choices_a, inputs=[dataset_name_a], outputs=[config_name_a]
1386
+ )
1387
+
1388
+ load_dataset_btn_a.click(
1389
+ fn=load_dataset_a,
1390
+ inputs=[dataset_name_a, config_name_a],
1391
+ outputs=[current_dataset_a, dataset_status_a, trajectory_slider_a],
1392
+ )
1393
+
1394
+ use_dataset_video_btn_a.click(
1395
+ fn=use_dataset_video_a,
1396
+ inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1397
+ outputs=[video_a_input, dataset_status_a, trajectory_metadata_a],
1398
+ )
1399
+
1400
+ next_traj_btn_a.click(
1401
+ fn=next_trajectory_a,
1402
+ inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1403
+ outputs=[
1404
+ trajectory_slider_a,
1405
+ video_a_input,
1406
+ trajectory_metadata_a,
1407
+ dataset_status_a,
1408
+ ],
1409
+ )
1410
+
1411
+ prev_traj_btn_a.click(
1412
+ fn=prev_trajectory_a,
1413
+ inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1414
+ outputs=[
1415
+ trajectory_slider_a,
1416
+ video_a_input,
1417
+ trajectory_metadata_a,
1418
+ dataset_status_a,
1419
+ ],
1420
+ )
1421
+
1422
+ trajectory_slider_a.change(
1423
+ fn=update_trajectory_on_slider_change_a,
1424
+ inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
1425
+ outputs=[trajectory_metadata_a, dataset_status_a],
1426
+ )
1427
+
1428
+ # Video B dataset selection handlers
1429
+ dataset_name_b.change(
1430
+ fn=update_config_choices_b, inputs=[dataset_name_b], outputs=[config_name_b]
1431
+ )
1432
+
1433
+ refresh_configs_btn_b.click(
1434
+ fn=update_config_choices_b, inputs=[dataset_name_b], outputs=[config_name_b]
1435
+ )
1436
+
1437
+ load_dataset_btn_b.click(
1438
+ fn=load_dataset_b,
1439
+ inputs=[dataset_name_b, config_name_b],
1440
+ outputs=[current_dataset_b, dataset_status_b, trajectory_slider_b],
1441
+ )
1442
+
1443
+ use_dataset_video_btn_b.click(
1444
+ fn=use_dataset_video_b,
1445
+ inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1446
+ outputs=[video_b_input, dataset_status_b, trajectory_metadata_b],
1447
+ )
1448
+
1449
+ next_traj_btn_b.click(
1450
+ fn=next_trajectory_b,
1451
+ inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1452
+ outputs=[
1453
+ trajectory_slider_b,
1454
+ video_b_input,
1455
+ trajectory_metadata_b,
1456
+ dataset_status_b,
1457
+ ],
1458
+ )
1459
+
1460
+ prev_traj_btn_b.click(
1461
+ fn=prev_trajectory_b,
1462
+ inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1463
+ outputs=[
1464
+ trajectory_slider_b,
1465
+ video_b_input,
1466
+ trajectory_metadata_b,
1467
+ dataset_status_b,
1468
+ ],
1469
+ )
1470
+
1471
+ trajectory_slider_b.change(
1472
+ fn=update_trajectory_on_slider_change_b,
1473
+ inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
1474
+ outputs=[trajectory_metadata_b, dataset_status_b],
1475
+ )
1476
+
1477
+ analyze_dual_btn.click(
1478
+ fn=process_two_videos,
1479
+ inputs=[video_a_input, video_b_input, task_text_dual, prediction_type, server_url_state, fps_input_dual],
1480
+ outputs=[result_text, video_a_display, video_b_display],
1481
+ api_name="process_two_videos",
1482
+ )
1483
 
1484
 
1485
  def main():