resproj007's picture
Create app.py
1c29f6f verified
#!/usr/bin/env python3
"""
Pathological Speech Synthesis Comparison - Parquet Version
==========================================================
Uses parquet files instead of wav files to avoid Git LFS requirements.
"""
import gradio as gr
import pandas as pd
import numpy as np
import json
from pathlib import Path
from typing import Dict, List, Tuple
# Speaker info for tabs
SPEAKERS = {
'F04': 'F04 (TORGO Dys F)',
'M02': 'M02 (TORGO Dys M)',
'FC02': 'FC02 (TORGO Hlth F)',
'MC01': 'MC01 (TORGO Hlth M)',
'F02': 'F02 (UA Dys F)',
'M04': 'M04 (UA Dys M)',
'211': '211 (LibriSp F)',
'4014': '4014 (LibriSp M)'
}
# Methods in evaluation order
METHODS = [
'Original',
'Baseline_KNN',
'Orpheus_TTS',
'Orpheus_KNN',
'Sesame_TTS',
'Sesame_KNN',
'Spark_TTS',
'Spark_KNN'
]
def load_speaker_data(speaker_id: str) -> pd.DataFrame:
"""Load parquet data for a speaker."""
parquet_file = Path(f'{speaker_id}_audio_samples.parquet')
if not parquet_file.exists():
print(f"⚠️ Parquet file not found: {parquet_file}")
return pd.DataFrame()
try:
df = pd.read_parquet(parquet_file)
return df
except Exception as e:
print(f"⚠️ Error loading parquet for {speaker_id}: {e}")
return pd.DataFrame()
def get_audio_from_parquet(df: pd.DataFrame, method: str, sample_num: str) -> Tuple[int, np.ndarray]:
"""Extract audio array from parquet data."""
# Filter for specific method and sample
filtered = df[(df['method'] == method) & (df['sample_number'] == sample_num)]
if filtered.empty:
return None, None
row = filtered.iloc[0]
# Parse audio array from JSON string
audio_array = json.loads(row['audio_array'])
sample_rate = int(row['sample_rate'])
return sample_rate, np.array(audio_array, dtype=np.float32)
def get_transcript_info(df: pd.DataFrame, sample_num: str) -> str:
"""Get transcript for a sample number."""
sample_info = df[df['sample_number'] == sample_num]
if not sample_info.empty:
return sample_info.iloc[0]['transcript']
return f"Sample {sample_num}"
def create_speaker_interface(speaker_id: str):
"""Create audio comparison interface for one speaker using parquet data."""
# Load speaker data
df = load_speaker_data(speaker_id)
if df.empty:
return [gr.Markdown(f"⚠️ No data available for {speaker_id}")]
components = []
# Header
components.append(gr.Markdown(f"## {SPEAKERS[speaker_id]}"))
# Get available samples
sample_numbers = sorted(df['sample_number'].unique())
# Audio comparison grid
for sample_num in sample_numbers[:3]: # Maximum 3 samples
transcript = get_transcript_info(df, sample_num)
components.append(gr.Markdown(f"### Sample {sample_num}: \"{transcript}\""))
with gr.Row():
for method in METHODS:
with gr.Column(scale=1):
sample_rate, audio_array = get_audio_from_parquet(df, method, sample_num)
if sample_rate is not None and audio_array is not None:
# Gradio expects tuple (sample_rate, audio_array)
audio_data = (sample_rate, audio_array)
gr.Audio(
value=audio_data,
label=method,
interactive=False,
show_download_button=False
)
else:
gr.Markdown(f"**{method}**\n*No audio*")
return components
def create_parquet_interface():
"""Create the parquet-based comparison interface."""
with gr.Blocks(
title="Pathological Speech Synthesis Samples",
theme=gr.themes.Soft()
) as demo:
# Simple title
gr.Markdown("""
# Pathological Speech Synthesis Samples
Synthesis methods using identical transcripts for each speaker.
""")
# Speaker tabs
with gr.Tabs():
for speaker_id, speaker_name in SPEAKERS.items():
with gr.Tab(speaker_name):
create_speaker_interface(speaker_id)
# Simple footer
gr.Markdown("**Models and datasets:** https://huggingface.co/resproj007")
return demo
if __name__ == "__main__":
demo = create_parquet_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)