Spaces:

yuqingluo0509
/

sound_generation_user_study

Running

App Files Files Community

sound_generation_user_study / old_app.py

yuqingluo0509

modified app.py

a65a571 about 1 year ago

raw

history blame contribute delete

5.34 kB

	import gradio as gr
	import json
	import os
	import random
	import time
	from datetime import datetime
	from huggingface_hub import Repository
	import subprocess

	# Set Git user information
	subprocess.run(["git", "config", "--global", "user.email", "yuqingll@umich.edu"])
	subprocess.run(["git", "config", "--global", "user.name", "yuqingluo0509"])

	hf_token = os.getenv("HF_TOKEN")
	print("HF Token is none?", hf_token is None)

	# Define questions for both sets
	# audio_image_questions = ["Ignore the image quality, which image best matches the audio?"]
	# image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
	question = ["Which one sounds more realistic"]

	# Load audio-image pairs and image-audio pairs with random sampling
	def load_random_pairs():
	random.seed(time.time())

	video_pairs = []
	video_mappings = {}

	gt_videos = random.sample(os.listdir("./gt/"), k=3) # k=num_sample
	groups = [gt_videos[i::4] for i in range(4)]
	sarf_dirs = [
	"./sarf_no_cavp/",
	"./sarf_no_cavp_no_clip/",
	"./sarf_no_cavp_no_hand/",
	"./sarf_no_cavp_single_view/"
	]

	for group, sarf_dir in zip(groups, sarf_dirs):
	for name in group:
	fn = name
	sarf_video_path = f"{sarf_dir}{fn}"
	pair = (f"./gt/{fn}", sarf_video_path)
	sarf_dir_name = sarf_dir.split('/')[1]
	mapping = ("gt", sarf_dir_name)

	if random.random() > 0.5:
	pair = (pair[1], pair[0])
	mapping = (sarf_dir_name, "gt")

	video_pairs.append(pair)
	video_mappings[fn] = {} # fn=gt_path
	video_mappings[fn]["Video 1"] = mapping[0]
	video_mappings[fn]["Video 2"] = mapping[1]

	random.shuffle(video_pairs)
	print("load pairs after shuffle:")
	print(video_pairs)
	print("load mapping:")
	print(video_mappings)

	return video_pairs, video_mappings

	# Initialize the Hugging Face repository
	repo_url = "https://huggingface.co/datasets/yuqingluo0509/sound_generation_response"
	repo = Repository(local_dir="user_responses", clone_from=repo_url, use_auth_token=hf_token)

	def save_responses(*responses):
	global video_mappings
	global video_pairs
	session_id = f"session_{int(time.time())}"
	data = {
	"user_id": session_id,
	"timestamp": datetime.now().isoformat(),
	"video_responses": []
	}

	for i, (video1_path, video2_path) in enumerate(video_pairs):
	gt_path = ""
	if "gt" in video1_path:
	gt_path = video1_path
	else:
	gt_path = video2_path
	gt_name = gt_path.split('/')[-1]
	selected_video = responses[i]
	original_video_index = video_mappings[gt_name][selected_video]
	data["video_responses"].append({
	# "video_1": os.path.basename(video1_path),
	# "video_2": os.path.basename(video2_path),
	"video_1": video1_path,
	"video_2": video2_path,
	"selected_video": f"{original_video_index}"
	})
	print("save response:")
	print(data)
	response_file = f"user_responses/{session_id}_responses.json"
	os.makedirs("user_responses", exist_ok=True)
	with open(response_file, "w") as f:
	json.dump(data, f, indent=4)
	repo.git_pull()
	repo.push_to_hub()
	return "All responses saved! Thank you."

	def load_and_update():
	global video_mappings
	global video_pairs
	video_pairs, video_mappings = load_random_pairs()

	flat_output = []
	for video1, video2 in video_pairs:
	flat_output.extend([video1, video2])
	return flat_output

	def create_interface():
	with gr.Blocks() as demo:
	gr.Markdown("## Sound Generation User Study")
	gr.Markdown("Below are 32 pairs of videos with virtual hand interacting with the scene, making different sounds.")
	gr.Markdown("Please listen to each pair of videos and select which one sounds more realistic.")
	gr.Markdown("* It may take some time to load all the videos. We are appreciated for your time and patience!")

	video_components = []
	video_responses = []
	for idx in range(3): # k=num_samples
	gr.Markdown(f"### Pair {idx + 1}")
	with gr.Row():
	video1 = gr.Video(label="Video 1")
	video2 = gr.Video(label="Video 2")
	radio = gr.Radio(["Video 1", "Video 2"], label="Which one sounds more realistic?")
	gr.Markdown("---")
	video_components.extend([video1, video2])
	video_responses.append(radio)

	submit_btn = gr.Button("Submit All")
	result_message = gr.Textbox(label="Message", interactive=False)

	def validate_and_save(*responses):
	if any(response is None for response in responses):
	return "Please answer all questions before submitting."
	return save_responses(*responses)

	demo.load(
	fn=load_and_update,
	inputs=None,
	outputs=video_components[:6] # Displaying two videos for each pair
	)

	submit_btn.click(
	fn=validate_and_save,
	inputs=video_responses,
	outputs=result_message
	)

	return demo

	demo = create_interface()
	demo.launch(share=True)