Spaces:
Paused
Paused
| import os | |
| import shutil | |
| import gradio as gr | |
| import zipfile | |
| import tempfile | |
| from infer.modules.train.preprocess import PreProcess | |
| from typing import Tuple | |
| from huggingface_hub import snapshot_download | |
| def extract_audio_files(zip_file: str, target_dir: str) -> list[str]: | |
| with zipfile.ZipFile(zip_file, "r") as zip_ref: | |
| zip_ref.extractall(target_dir) | |
| audio_files = [ | |
| os.path.join(target_dir, f) | |
| for f in os.listdir(target_dir) | |
| if f.endswith((".wav", ".mp3", ".ogg")) | |
| ] | |
| if not audio_files: | |
| raise gr.Error("No audio files found at the top level of the zip file") | |
| return audio_files | |
| def create_new_expdir(zip_file: str) -> Tuple[str, str]: | |
| temp_dir = tempfile.mkdtemp() | |
| print(f"Using exp dir: {temp_dir}") | |
| data_dir = os.path.join(temp_dir, "_data") | |
| os.makedirs(data_dir) | |
| audio_files = extract_audio_files(zip_file, data_dir) | |
| pp = PreProcess(40000, temp_dir, 3.0, False) | |
| pp.pipeline_mp_inp_dir(data_dir, 4) | |
| pp.logfile.seek(0) | |
| log = pp.logfile.read() | |
| return temp_dir, f"Preprocessed {len(audio_files)} audio files.\n{log}" | |
| def restore_expdir(zip: str) -> str: | |
| exp_dir = tempfile.mkdtemp() | |
| shutil.unpack_archive(zip, exp_dir) | |
| return exp_dir | |
| def restore_from_huggingface(repo: str, token: str) -> str: | |
| exp_dir = os.path.join(tempfile.mkdtemp(), repo.lower()) | |
| snapshot_download( | |
| repo, local_dir=exp_dir, token=token if token.startswith("hf_") else None | |
| ) | |
| return exp_dir | |
| def set_dir(dir_val: str) -> str: | |
| if not dir_val.startswith("/tmp/"): | |
| dir_val = os.path.join("/tmp", dir_val) | |
| if not os.path.isdir(dir_val): | |
| raise gr.Error("Directory does not exist") | |
| return dir_val | |
| class SetupTab: | |
| def __init__(self): | |
| pass | |
| def ui(self): | |
| gr.Markdown("# Setup Experiment") | |
| gr.Markdown( | |
| "You can upload a zip file containing audio files to start a new experiment, or upload an experiment directory zip file to restore an existing experiment.\n" | |
| "The suggested dataset size is > 5 minutes of audio." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| self.zip_file = gr.File( | |
| label="Upload a zip file containing audio files for training", | |
| file_types=["zip"], | |
| ) | |
| self.preprocess_log = gr.Textbox(label="Log", lines=5) | |
| self.preprocess_btn = gr.Button( | |
| value="Start New Experiment", variant="primary" | |
| ) | |
| with gr.Row(): | |
| self.restore_zip_file = gr.File( | |
| label="Upload the experiment directory zip file", | |
| file_types=["zip"], | |
| ) | |
| self.restore_btn = gr.Button(value="Restore Experiment", variant="primary") | |
| gr.Markdown("You can also restore from a Hugging Face repo.") | |
| with gr.Row(): | |
| self.hf_repo = gr.Textbox( | |
| label="Restore from Hugging Face repo", | |
| placeholder="username/repo", | |
| ) | |
| self.hf_token = gr.Textbox( | |
| label="Hugging Face token (optional)", | |
| placeholder="hf_...", | |
| ) | |
| self.restore_hf_btn = gr.Button(value="Restore from Hugging Face") | |
| with gr.Row(): | |
| self.dir_val = gr.Textbox( | |
| label="Manually set the experiment directory (don't touch it unless you know what you are doing)", | |
| placeholder="/tmp/...", | |
| ) | |
| self.set_dir_btn = gr.Button(value="Set Directory") | |
| def build(self, exp_dir: gr.Textbox): | |
| self.preprocess_btn.click( | |
| fn=create_new_expdir, | |
| inputs=[self.zip_file], | |
| outputs=[exp_dir, self.preprocess_log], | |
| ) | |
| self.restore_btn.click( | |
| fn=restore_expdir, | |
| inputs=[self.restore_zip_file], | |
| outputs=[exp_dir], | |
| ) | |
| self.restore_hf_btn.click( | |
| fn=restore_from_huggingface, | |
| inputs=[self.hf_repo, self.hf_token], | |
| outputs=[exp_dir], | |
| ) | |
| self.set_dir_btn.click( | |
| fn=set_dir, | |
| inputs=[self.dir_val], | |
| outputs=[exp_dir], | |
| ) | |