yuqingluo0509 commited on
Commit
a65a571
·
1 Parent(s): d2772ff

modified app.py

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. old_app.py +155 -0
app.py CHANGED
@@ -19,6 +19,8 @@ print("HF Token is none?", hf_token is None)
19
  # image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
20
  question = ["Which one sounds more realistic"]
21
 
 
 
22
  # Load audio-image pairs and image-audio pairs with random sampling
23
  def load_random_pairs():
24
  random.seed(time.time())
 
19
  # image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
20
  question = ["Which one sounds more realistic"]
21
 
22
+ model_name = []
23
+
24
  # Load audio-image pairs and image-audio pairs with random sampling
25
  def load_random_pairs():
26
  random.seed(time.time())
old_app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import random
5
+ import time
6
+ from datetime import datetime
7
+ from huggingface_hub import Repository
8
+ import subprocess
9
+
10
+ # Set Git user information
11
+ subprocess.run(["git", "config", "--global", "user.email", "yuqingll@umich.edu"])
12
+ subprocess.run(["git", "config", "--global", "user.name", "yuqingluo0509"])
13
+
14
+ hf_token = os.getenv("HF_TOKEN")
15
+ print("HF Token is none?", hf_token is None)
16
+
17
+ # Define questions for both sets
18
+ # audio_image_questions = ["Ignore the image quality, which image best matches the audio?"]
19
+ # image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
20
+ question = ["Which one sounds more realistic"]
21
+
22
+ # Load audio-image pairs and image-audio pairs with random sampling
23
+ def load_random_pairs():
24
+ random.seed(time.time())
25
+
26
+ video_pairs = []
27
+ video_mappings = {}
28
+
29
+ gt_videos = random.sample(os.listdir("./gt/"), k=3) # k=num_sample
30
+ groups = [gt_videos[i::4] for i in range(4)]
31
+ sarf_dirs = [
32
+ "./sarf_no_cavp/",
33
+ "./sarf_no_cavp_no_clip/",
34
+ "./sarf_no_cavp_no_hand/",
35
+ "./sarf_no_cavp_single_view/"
36
+ ]
37
+
38
+ for group, sarf_dir in zip(groups, sarf_dirs):
39
+ for name in group:
40
+ fn = name
41
+ sarf_video_path = f"{sarf_dir}{fn}"
42
+ pair = (f"./gt/{fn}", sarf_video_path)
43
+ sarf_dir_name = sarf_dir.split('/')[1]
44
+ mapping = ("gt", sarf_dir_name)
45
+
46
+ if random.random() > 0.5:
47
+ pair = (pair[1], pair[0])
48
+ mapping = (sarf_dir_name, "gt")
49
+
50
+ video_pairs.append(pair)
51
+ video_mappings[fn] = {} # fn=gt_path
52
+ video_mappings[fn]["Video 1"] = mapping[0]
53
+ video_mappings[fn]["Video 2"] = mapping[1]
54
+
55
+ random.shuffle(video_pairs)
56
+ print("load pairs after shuffle:")
57
+ print(video_pairs)
58
+ print("load mapping:")
59
+ print(video_mappings)
60
+
61
+ return video_pairs, video_mappings
62
+
63
+ # Initialize the Hugging Face repository
64
+ repo_url = "https://huggingface.co/datasets/yuqingluo0509/sound_generation_response"
65
+ repo = Repository(local_dir="user_responses", clone_from=repo_url, use_auth_token=hf_token)
66
+
67
+ def save_responses(*responses):
68
+ global video_mappings
69
+ global video_pairs
70
+ session_id = f"session_{int(time.time())}"
71
+ data = {
72
+ "user_id": session_id,
73
+ "timestamp": datetime.now().isoformat(),
74
+ "video_responses": []
75
+ }
76
+
77
+ for i, (video1_path, video2_path) in enumerate(video_pairs):
78
+ gt_path = ""
79
+ if "gt" in video1_path:
80
+ gt_path = video1_path
81
+ else:
82
+ gt_path = video2_path
83
+ gt_name = gt_path.split('/')[-1]
84
+ selected_video = responses[i]
85
+ original_video_index = video_mappings[gt_name][selected_video]
86
+ data["video_responses"].append({
87
+ # "video_1": os.path.basename(video1_path),
88
+ # "video_2": os.path.basename(video2_path),
89
+ "video_1": video1_path,
90
+ "video_2": video2_path,
91
+ "selected_video": f"{original_video_index}"
92
+ })
93
+ print("save response:")
94
+ print(data)
95
+ response_file = f"user_responses/{session_id}_responses.json"
96
+ os.makedirs("user_responses", exist_ok=True)
97
+ with open(response_file, "w") as f:
98
+ json.dump(data, f, indent=4)
99
+ repo.git_pull()
100
+ repo.push_to_hub()
101
+ return "All responses saved! Thank you."
102
+
103
+ def load_and_update():
104
+ global video_mappings
105
+ global video_pairs
106
+ video_pairs, video_mappings = load_random_pairs()
107
+
108
+ flat_output = []
109
+ for video1, video2 in video_pairs:
110
+ flat_output.extend([video1, video2])
111
+ return flat_output
112
+
113
+ def create_interface():
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown("## Sound Generation User Study")
116
+ gr.Markdown("Below are 32 pairs of videos with virtual hand interacting with the scene, making different sounds.")
117
+ gr.Markdown("Please listen to each pair of videos and select which one sounds more realistic.")
118
+ gr.Markdown("* It may take some time to load all the videos. We are appreciated for your time and patience!")
119
+
120
+ video_components = []
121
+ video_responses = []
122
+ for idx in range(3): # k=num_samples
123
+ gr.Markdown(f"### Pair {idx + 1}")
124
+ with gr.Row():
125
+ video1 = gr.Video(label="Video 1")
126
+ video2 = gr.Video(label="Video 2")
127
+ radio = gr.Radio(["Video 1", "Video 2"], label="Which one sounds more realistic?")
128
+ gr.Markdown("---")
129
+ video_components.extend([video1, video2])
130
+ video_responses.append(radio)
131
+
132
+ submit_btn = gr.Button("Submit All")
133
+ result_message = gr.Textbox(label="Message", interactive=False)
134
+
135
+ def validate_and_save(*responses):
136
+ if any(response is None for response in responses):
137
+ return "Please answer all questions before submitting."
138
+ return save_responses(*responses)
139
+
140
+ demo.load(
141
+ fn=load_and_update,
142
+ inputs=None,
143
+ outputs=video_components[:6] # Displaying two videos for each pair
144
+ )
145
+
146
+ submit_btn.click(
147
+ fn=validate_and_save,
148
+ inputs=video_responses,
149
+ outputs=result_message
150
+ )
151
+
152
+ return demo
153
+
154
+ demo = create_interface()
155
+ demo.launch(share=True)