Spaces:
Runtime error
Runtime error
| """ | |
| app.py – Gradio portal for COMP5300 voice‑cloning study (Hugging Face Spaces) | |
| ------------------------------------------------------------------------- | |
| • Consistent sentence list (prompts.txt). One prompt shown at a time. | |
| • Volunteer enters Speaker‑ID, records, clicks **Submit & Next**. | |
| • WAV saved locally in /persistent/raw/<speaker>/ | |
| • Metadata appended to /persistent/meta.csv → speaker_id,prompt_idx,prompt_text,path | |
| • Tracks completed prompts and total recording duration in /persistent/progress.json. | |
| • Resumes from the next incomplete prompt for a given Speaker-ID. | |
| Tested on **Gradio** and **Python** in Hugging Face Spaces – May 2025. | |
| Install deps: | |
| pip install gradio soundfile numpy | |
| Run locally (for testing): | |
| python app.py --prompts prompts.txt | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import csv | |
| import datetime as dt | |
| import io | |
| from pathlib import Path | |
| from typing import List, Tuple, Union | |
| import os | |
| import json | |
| import gradio as gr | |
| import numpy as np | |
| import soundfile as sf | |
| AudioLike = Union[Tuple[int, np.ndarray], str, dict] | |
| # Define the root directory for persistent storage in Hugging Face Spaces | |
| LOCAL_ROOT = Path("/persistent") | |
| # Define the progress file path within persistent storage | |
| PROGRESS_FILE = LOCAL_ROOT / "progress.json" | |
| META_FILE = LOCAL_ROOT / "meta.csv" | |
| RAW_AUDIO_DIR = LOCAL_ROOT / "raw" | |
| # ----------------------------------------------------------------------------- | |
| # Helpers | |
| # ----------------------------------------------------------------------------- | |
| def load_prompts(path: Path) -> List[str]: | |
| """Load non‑empty lines from prompts.txt.""" | |
| # Check if running in Hugging Face Space (a common indicator is the presence of a 'HOME' environment variable) | |
| if os.environ.get("HOME") == "/home/user": | |
| prompts_file_path = Path("./prompts.txt") # Path relative to the app.py file in the Space | |
| else: | |
| prompts_file_path = path # Use the provided path if running locally | |
| if prompts_file_path.exists(): | |
| return [ln.strip() for ln in prompts_file_path.read_text(encoding="utf8").splitlines() if ln.strip()] | |
| else: | |
| raise FileNotFoundError(f"Prompts file not found at: {prompts_file_path}") | |
| def audio_to_wav_bytes(audio: AudioLike) -> bytes: | |
| """Convert Gradio Audio return‑value to raw WAV bytes.""" | |
| if isinstance(audio, tuple) and len(audio) == 2: | |
| sr, wav = audio # type: ignore | |
| buf = io.BytesIO() | |
| sf.write(buf, wav, sr, format="WAV") | |
| return buf.getvalue() | |
| if isinstance(audio, dict): | |
| if "data" in audio and audio["data"]: | |
| sr, wav = audio["data"] # type: ignore | |
| buf = io.BytesIO() | |
| sf.write(buf, wav, sr, format="WAV") | |
| return buf.getvalue() | |
| if "path" in audio and audio["path"]: | |
| return Path(audio["path"]).read_bytes() # type: ignore | |
| if isinstance(audio, str) and Path(audio).exists(): | |
| return Path(audio).read_bytes() | |
| raise ValueError("Unrecognized audio format from Gradio component") | |
| def load_progress(progress_file: Path) -> dict: | |
| """Load progress data from JSON file.""" | |
| if progress_file.exists(): | |
| try: | |
| with progress_file.open("r") as f: | |
| return json.load(f) | |
| except json.JSONDecodeError: | |
| print("Error decoding progress.json. Starting with an empty progress.") | |
| return {} | |
| else: | |
| return {} | |
| def save_progress(progress_file: Path, speaker_id: str, prompt_idx: int, audio_duration: float) -> None: | |
| """Save progress to a JSON file.""" | |
| progress = load_progress(progress_file) | |
| if speaker_id not in progress: | |
| progress[speaker_id] = { | |
| "completed_prompts": [], | |
| "total_duration_seconds": 0.0, | |
| } | |
| if prompt_idx not in progress[speaker_id]["completed_prompts"]: | |
| progress[speaker_id]["completed_prompts"].append(prompt_idx) | |
| progress[speaker_id]["total_duration_seconds"] += audio_duration | |
| progress[speaker_id]["completed_prompts"] = sorted(list(set(progress[speaker_id]["completed_prompts"]))) | |
| with progress_file.open("w") as f: | |
| json.dump(progress, f, indent=2) | |
| def save_local(data: bytes, path: Path): | |
| """Save data to a local file.""" | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| path.write_bytes(data) | |
| # ----------------------------------------------------------------------------- | |
| # Callback | |
| # ----------------------------------------------------------------------------- | |
| def record_and_save(speaker_id: str, | |
| prompt_idx: int, | |
| audio: AudioLike, | |
| prompts: list[str]): | |
| if not speaker_id.strip(): | |
| return gr.Warning("Please enter Speaker‑ID first."), prompts[prompt_idx], prompt_idx, "", "" | |
| if audio is None: | |
| return gr.Warning("Please record before submitting."), prompts[prompt_idx], prompt_idx, "", "" | |
| try: | |
| wav_bytes = audio_to_wav_bytes(audio) | |
| except Exception as e: | |
| return gr.Warning(f"Audio processing error: {e}"), prompts[prompt_idx], prompt_idx, "", "" | |
| timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S") | |
| fname = f"{speaker_id}_{prompt_idx:03d}_{timestamp}.wav" | |
| local_audio_path = RAW_AUDIO_DIR / speaker_id / fname | |
| path_str = str(local_audio_path) | |
| save_local(wav_bytes, local_audio_path) | |
| META_FILE.parent.mkdir(parents=True, exist_ok=True) | |
| with META_FILE.open("a", newline="", encoding="utf8") as f: | |
| csv.writer(f).writerow([speaker_id, prompt_idx, prompts[prompt_idx], path_str]) | |
| try: | |
| audio_info = sf.info(local_audio_path) | |
| audio_duration = audio_info.duration | |
| except Exception as e: | |
| print(f"Error getting audio info: {e}") | |
| audio_duration = 0.0 | |
| save_progress(PROGRESS_FILE, speaker_id, prompt_idx, audio_duration) | |
| progress_data = load_progress(PROGRESS_FILE) | |
| completed_count = len(progress_data.get(speaker_id, {}).get("completed_prompts", [])) | |
| total_duration = progress_data.get(speaker_id, {}).get("total_duration_seconds", 0.0) | |
| completed_prompts = set(progress_data.get(speaker_id, {}).get("completed_prompts", [])) | |
| next_prompt_idx = -1 | |
| for i in range(len(prompts)): | |
| if i not in completed_prompts: | |
| next_prompt_idx = i | |
| break | |
| if next_prompt_idx == -1: | |
| next_prompt_idx = 0 | |
| return f"✅ Saved to {path_str}", prompts[next_prompt_idx], next_prompt_idx, f"Completed: {completed_count}/{len(prompts)}", f"Total Duration: {total_duration:.2f} seconds" | |
| def update_prompt_on_speaker_change(speaker_id: str, prompts: list[str]) -> Tuple[str, int]: | |
| """Load progress and determine the next prompt when the speaker ID changes.""" | |
| if not speaker_id.strip(): | |
| return prompts[0], 0 | |
| progress_data = load_progress(PROGRESS_FILE) | |
| completed_prompts = set(progress_data.get(speaker_id, {}).get("completed_prompts", [])) | |
| next_prompt_idx = -1 | |
| for i in range(len(prompts)): | |
| if i not in completed_prompts: | |
| next_prompt_idx = i | |
| break | |
| if next_prompt_idx == -1: | |
| next_prompt_idx = 0 | |
| return prompts[next_prompt_idx], next_prompt_idx | |
| # ----------------------------------------------------------------------------- | |
| # UI builder | |
| # ----------------------------------------------------------------------------- | |
| def build_ui(prompts: list[str]): | |
| with gr.Blocks(title="COMP5300 Voice‑Recording Portal") as demo: | |
| gr.Markdown("""## Speaking Phase\n### Record sentences for the voice‑cloning study\n1. Find a quiet space.\n2. Click the microphone, read the sentence (mistakes are alright as long as you are speaking naturally, click stop.\n3. Hit **Submit & Next**. Repeat until done.""") | |
| gr.Markdown("""**Note:** This is a research study. Your recordings will be used to train a voice model.\nPlease enter your `Speaker-ID` before recording. Use PV username (e.g. Jane Doe = `jdoe`).""") | |
| speaker = gr.Text(label="Speaker‑ID") | |
| prompt_box = gr.Textbox(label="Sentence to read") | |
| idx_state = gr.State(0) | |
| progress_display = gr.Markdown(label="Progress") | |
| duration_display = gr.Markdown(label="Total Duration") | |
| mic = gr.Audio(sources=["microphone"], format="wav", label="🎙️ Record here") | |
| status = gr.Markdown() | |
| btn = gr.Button("Submit & Next ➡️") | |
| speaker.change(fn=update_prompt_on_speaker_change, | |
| inputs=[speaker, gr.State(prompts)], | |
| outputs=[prompt_box, idx_state]) | |
| btn.click(record_and_save, | |
| inputs=[speaker, idx_state, mic, gr.State(prompts)], | |
| outputs=[status, prompt_box, idx_state, progress_display, duration_display]) | |
| return demo | |
| # ----------------------------------------------------------------------------- | |
| # main | |
| # ----------------------------------------------------------------------------- | |
| def main(): | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--prompts", type=Path, required=True, help="Text file with one sentence per line") | |
| args = ap.parse_args() | |
| prompts = load_prompts(args.prompts) | |
| ui = build_ui(prompts) | |
| ui.launch() | |
| if __name__ == "__main__": | |
| if os.environ.get("HOME") == "/home/user": | |
| # Running in Hugging Face Space, use /data for persistent storage | |
| LOCAL_ROOT = Path("/data") | |
| PROGRESS_FILE = LOCAL_ROOT / "progress.json" | |
| META_FILE = LOCAL_ROOT / "meta.csv" | |
| RAW_AUDIO_DIR = LOCAL_ROOT / "raw" | |
| # Ensure parent directories exist | |
| RAW_AUDIO_DIR.mkdir(parents=True, exist_ok=True) | |
| PROGRESS_FILE.parent.mkdir(parents=True, exist_ok=True) | |
| META_FILE.parent.mkdir(parents=True, exist_ok=True) | |
| prompts = load_prompts(Path("./prompts.txt")) | |
| ui = build_ui(prompts) | |
| ui.launch() | |
| else: | |
| # Running locally, use command-line arguments | |
| main() |