Spaces:

AksharaSharma
/

voice-authenticity-openenv

Running

voice authenticity openenv - initial submission

72983a7 1 day ago

850 Bytes

	from datasets import load_dataset
	import soundfile as sf
	import os

	os.makedirs("data/real", exist_ok=True)
	os.makedirs("data/fake", exist_ok=True)

	dataset = load_dataset("garystafford/deepfake-audio-detection", split="train")

	real_count = 0
	fake_count = 0

	for item in dataset:
	audio = item["audio"]
	label = item["label"] # 0=real, 1=fake

	if label == 0 and real_count < 250:
	sf.write(f"data/real/real_{real_count:04d}.wav",
	audio["array"], audio["sampling_rate"])
	real_count += 1

	elif label == 1 and fake_count < 250:
	sf.write(f"data/fake/fake_{fake_count:04d}.wav",
	audio["array"], audio["sampling_rate"])
	fake_count += 1

	if real_count >= 250 and fake_count >= 250:
	break

	print(f"Downloaded: {real_count} real, {fake_count} fake")