yuqingluo0509's picture
modified app.py
a65a571
import gradio as gr
import json
import os
import random
import time
from datetime import datetime
from huggingface_hub import Repository
import subprocess
# Set Git user information
subprocess.run(["git", "config", "--global", "user.email", "yuqingll@umich.edu"])
subprocess.run(["git", "config", "--global", "user.name", "yuqingluo0509"])
hf_token = os.getenv("HF_TOKEN")
print("HF Token is none?", hf_token is None)
# Define questions for both sets
# audio_image_questions = ["Ignore the image quality, which image best matches the audio?"]
# image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
question = ["Which one sounds more realistic"]
# Load audio-image pairs and image-audio pairs with random sampling
def load_random_pairs():
random.seed(time.time())
video_pairs = []
video_mappings = {}
gt_videos = random.sample(os.listdir("./gt/"), k=3) # k=num_sample
groups = [gt_videos[i::4] for i in range(4)]
sarf_dirs = [
"./sarf_no_cavp/",
"./sarf_no_cavp_no_clip/",
"./sarf_no_cavp_no_hand/",
"./sarf_no_cavp_single_view/"
]
for group, sarf_dir in zip(groups, sarf_dirs):
for name in group:
fn = name
sarf_video_path = f"{sarf_dir}{fn}"
pair = (f"./gt/{fn}", sarf_video_path)
sarf_dir_name = sarf_dir.split('/')[1]
mapping = ("gt", sarf_dir_name)
if random.random() > 0.5:
pair = (pair[1], pair[0])
mapping = (sarf_dir_name, "gt")
video_pairs.append(pair)
video_mappings[fn] = {} # fn=gt_path
video_mappings[fn]["Video 1"] = mapping[0]
video_mappings[fn]["Video 2"] = mapping[1]
random.shuffle(video_pairs)
print("load pairs after shuffle:")
print(video_pairs)
print("load mapping:")
print(video_mappings)
return video_pairs, video_mappings
# Initialize the Hugging Face repository
repo_url = "https://huggingface.co/datasets/yuqingluo0509/sound_generation_response"
repo = Repository(local_dir="user_responses", clone_from=repo_url, use_auth_token=hf_token)
def save_responses(*responses):
global video_mappings
global video_pairs
session_id = f"session_{int(time.time())}"
data = {
"user_id": session_id,
"timestamp": datetime.now().isoformat(),
"video_responses": []
}
for i, (video1_path, video2_path) in enumerate(video_pairs):
gt_path = ""
if "gt" in video1_path:
gt_path = video1_path
else:
gt_path = video2_path
gt_name = gt_path.split('/')[-1]
selected_video = responses[i]
original_video_index = video_mappings[gt_name][selected_video]
data["video_responses"].append({
# "video_1": os.path.basename(video1_path),
# "video_2": os.path.basename(video2_path),
"video_1": video1_path,
"video_2": video2_path,
"selected_video": f"{original_video_index}"
})
print("save response:")
print(data)
response_file = f"user_responses/{session_id}_responses.json"
os.makedirs("user_responses", exist_ok=True)
with open(response_file, "w") as f:
json.dump(data, f, indent=4)
repo.git_pull()
repo.push_to_hub()
return "All responses saved! Thank you."
def load_and_update():
global video_mappings
global video_pairs
video_pairs, video_mappings = load_random_pairs()
flat_output = []
for video1, video2 in video_pairs:
flat_output.extend([video1, video2])
return flat_output
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("## Sound Generation User Study")
gr.Markdown("Below are 32 pairs of videos with virtual hand interacting with the scene, making different sounds.")
gr.Markdown("Please listen to each pair of videos and select which one sounds more realistic.")
gr.Markdown("* It may take some time to load all the videos. We are appreciated for your time and patience!")
video_components = []
video_responses = []
for idx in range(3): # k=num_samples
gr.Markdown(f"### Pair {idx + 1}")
with gr.Row():
video1 = gr.Video(label="Video 1")
video2 = gr.Video(label="Video 2")
radio = gr.Radio(["Video 1", "Video 2"], label="Which one sounds more realistic?")
gr.Markdown("---")
video_components.extend([video1, video2])
video_responses.append(radio)
submit_btn = gr.Button("Submit All")
result_message = gr.Textbox(label="Message", interactive=False)
def validate_and_save(*responses):
if any(response is None for response in responses):
return "Please answer all questions before submitting."
return save_responses(*responses)
demo.load(
fn=load_and_update,
inputs=None,
outputs=video_components[:6] # Displaying two videos for each pair
)
submit_btn.click(
fn=validate_and_save,
inputs=video_responses,
outputs=result_message
)
return demo
demo = create_interface()
demo.launch(share=True)