Spaces:

yuqingluo0509
/

sound_generation_user_study

Sleeping

App Files Files Community

yuqingluo0509 commited on Mar 21, 2025

Commit

a65a571

1 Parent(s): d2772ff

modified app.py

Browse files

Files changed (2) hide show

app.py +2 -0
old_app.py +155 -0

app.py CHANGED Viewed

@@ -19,6 +19,8 @@ print("HF Token is none?", hf_token is None)
 # image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
 question = ["Which one sounds more realistic"]
 # Load audio-image pairs and image-audio pairs with random sampling
 def load_random_pairs():
     random.seed(time.time())

 # image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
 question = ["Which one sounds more realistic"]
+model_name = []
 # Load audio-image pairs and image-audio pairs with random sampling
 def load_random_pairs():
     random.seed(time.time())

old_app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import gradio as gr
+import json
+import os
+import random
+import time
+from datetime import datetime
+from huggingface_hub import Repository
+import subprocess
+# Set Git user information
+subprocess.run(["git", "config", "--global", "user.email", "yuqingll@umich.edu"])
+subprocess.run(["git", "config", "--global", "user.name", "yuqingluo0509"])
+hf_token = os.getenv("HF_TOKEN")
+print("HF Token is none?", hf_token is None)
+# Define questions for both sets
+# audio_image_questions = ["Ignore the image quality, which image best matches the audio?"]
+# image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
+question = ["Which one sounds more realistic"]
+# Load audio-image pairs and image-audio pairs with random sampling
+def load_random_pairs():
+    random.seed(time.time())
+    video_pairs = []
+    video_mappings = {}
+    gt_videos = random.sample(os.listdir("./gt/"), k=3) # k=num_sample
+    groups = [gt_videos[i::4] for i in range(4)]
+    sarf_dirs = [
+        "./sarf_no_cavp/",
+        "./sarf_no_cavp_no_clip/",
+        "./sarf_no_cavp_no_hand/",
+        "./sarf_no_cavp_single_view/"
+    ]
+    for group, sarf_dir in zip(groups, sarf_dirs):
+        for name in group:
+            fn = name
+            sarf_video_path = f"{sarf_dir}{fn}"
+            pair = (f"./gt/{fn}", sarf_video_path)
+            sarf_dir_name = sarf_dir.split('/')[1]
+            mapping = ("gt", sarf_dir_name)
+            if random.random() > 0.5:
+                pair = (pair[1], pair[0])
+                mapping = (sarf_dir_name, "gt")
+            video_pairs.append(pair)
+            video_mappings[fn] = {} # fn=gt_path
+            video_mappings[fn]["Video 1"] = mapping[0]
+            video_mappings[fn]["Video 2"] = mapping[1]
+    random.shuffle(video_pairs)
+    print("load pairs after shuffle:")
+    print(video_pairs)
+    print("load mapping:")
+    print(video_mappings)
+    return video_pairs, video_mappings
+# Initialize the Hugging Face repository
+repo_url = "https://huggingface.co/datasets/yuqingluo0509/sound_generation_response"
+repo = Repository(local_dir="user_responses", clone_from=repo_url, use_auth_token=hf_token)
+def save_responses(*responses):
+    global video_mappings
+    global video_pairs
+    session_id = f"session_{int(time.time())}"
+    data = {
+        "user_id": session_id,
+        "timestamp": datetime.now().isoformat(),
+        "video_responses": []
+    }
+    for i, (video1_path, video2_path) in enumerate(video_pairs):
+        gt_path = ""
+        if "gt" in video1_path:
+            gt_path = video1_path
+        else:
+            gt_path = video2_path
+        gt_name = gt_path.split('/')[-1]
+        selected_video = responses[i]
+        original_video_index = video_mappings[gt_name][selected_video]
+        data["video_responses"].append({
+            # "video_1": os.path.basename(video1_path),
+            # "video_2": os.path.basename(video2_path),
+            "video_1": video1_path,
+            "video_2": video2_path,
+            "selected_video": f"{original_video_index}"
+        })
+    print("save response:")
+    print(data)
+    response_file = f"user_responses/{session_id}_responses.json"
+    os.makedirs("user_responses", exist_ok=True)
+    with open(response_file, "w") as f:
+        json.dump(data, f, indent=4)
+    repo.git_pull()
+    repo.push_to_hub()
+    return "All responses saved! Thank you."
+def load_and_update():
+    global video_mappings
+    global video_pairs
+    video_pairs, video_mappings = load_random_pairs()
+    flat_output = []
+    for video1, video2 in video_pairs:
+        flat_output.extend([video1, video2])
+    return flat_output
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("## Sound Generation User Study")
+        gr.Markdown("Below are 32 pairs of videos with virtual hand interacting with the scene, making different sounds.")
+        gr.Markdown("Please listen to each pair of videos and select which one sounds more realistic.")
+        gr.Markdown("* It may take some time to load all the videos. We are appreciated for your time and patience!")
+        video_components = []
+        video_responses = []
+        for idx in range(3): # k=num_samples
+            gr.Markdown(f"### Pair {idx + 1}")
+            with gr.Row():
+                video1 = gr.Video(label="Video 1")
+                video2 = gr.Video(label="Video 2")
+                radio = gr.Radio(["Video 1", "Video 2"], label="Which one sounds more realistic?")
+            gr.Markdown("---")
+            video_components.extend([video1, video2])
+            video_responses.append(radio)
+        submit_btn = gr.Button("Submit All")
+        result_message = gr.Textbox(label="Message", interactive=False)
+        def validate_and_save(*responses):
+            if any(response is None for response in responses):
+                return "Please answer all questions before submitting."
+            return save_responses(*responses)
+        demo.load(
+            fn=load_and_update,
+            inputs=None,
+            outputs=video_components[:6]  # Displaying two videos for each pair
+        )
+        submit_btn.click(
+            fn=validate_and_save,
+            inputs=video_responses,
+            outputs=result_message
+        )
+    return demo
+demo = create_interface()
+demo.launch(share=True)