Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Pathological Speech Synthesis Comparison - Parquet Version | |
| ========================================================== | |
| Uses parquet files instead of wav files to avoid Git LFS requirements. | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple | |
| # Speaker info for tabs | |
| SPEAKERS = { | |
| 'F04': 'F04 (TORGO Dys F)', | |
| 'M02': 'M02 (TORGO Dys M)', | |
| 'FC02': 'FC02 (TORGO Hlth F)', | |
| 'MC01': 'MC01 (TORGO Hlth M)', | |
| 'F02': 'F02 (UA Dys F)', | |
| 'M04': 'M04 (UA Dys M)', | |
| '211': '211 (LibriSp F)', | |
| '4014': '4014 (LibriSp M)' | |
| } | |
| # Methods in evaluation order | |
| METHODS = [ | |
| 'Original', | |
| 'Baseline_KNN', | |
| 'Orpheus_TTS', | |
| 'Orpheus_KNN', | |
| 'Sesame_TTS', | |
| 'Sesame_KNN', | |
| 'Spark_TTS', | |
| 'Spark_KNN' | |
| ] | |
| def load_speaker_data(speaker_id: str) -> pd.DataFrame: | |
| """Load parquet data for a speaker.""" | |
| parquet_file = Path(f'{speaker_id}_audio_samples.parquet') | |
| if not parquet_file.exists(): | |
| print(f"⚠️ Parquet file not found: {parquet_file}") | |
| return pd.DataFrame() | |
| try: | |
| df = pd.read_parquet(parquet_file) | |
| return df | |
| except Exception as e: | |
| print(f"⚠️ Error loading parquet for {speaker_id}: {e}") | |
| return pd.DataFrame() | |
| def get_audio_from_parquet(df: pd.DataFrame, method: str, sample_num: str) -> Tuple[int, np.ndarray]: | |
| """Extract audio array from parquet data.""" | |
| # Filter for specific method and sample | |
| filtered = df[(df['method'] == method) & (df['sample_number'] == sample_num)] | |
| if filtered.empty: | |
| return None, None | |
| row = filtered.iloc[0] | |
| # Parse audio array from JSON string | |
| audio_array = json.loads(row['audio_array']) | |
| sample_rate = int(row['sample_rate']) | |
| return sample_rate, np.array(audio_array, dtype=np.float32) | |
| def get_transcript_info(df: pd.DataFrame, sample_num: str) -> str: | |
| """Get transcript for a sample number.""" | |
| sample_info = df[df['sample_number'] == sample_num] | |
| if not sample_info.empty: | |
| return sample_info.iloc[0]['transcript'] | |
| return f"Sample {sample_num}" | |
| def create_speaker_interface(speaker_id: str): | |
| """Create audio comparison interface for one speaker using parquet data.""" | |
| # Load speaker data | |
| df = load_speaker_data(speaker_id) | |
| if df.empty: | |
| return [gr.Markdown(f"⚠️ No data available for {speaker_id}")] | |
| components = [] | |
| # Header | |
| components.append(gr.Markdown(f"## {SPEAKERS[speaker_id]}")) | |
| # Get available samples | |
| sample_numbers = sorted(df['sample_number'].unique()) | |
| # Audio comparison grid | |
| for sample_num in sample_numbers[:3]: # Maximum 3 samples | |
| transcript = get_transcript_info(df, sample_num) | |
| components.append(gr.Markdown(f"### Sample {sample_num}: \"{transcript}\"")) | |
| with gr.Row(): | |
| for method in METHODS: | |
| with gr.Column(scale=1): | |
| sample_rate, audio_array = get_audio_from_parquet(df, method, sample_num) | |
| if sample_rate is not None and audio_array is not None: | |
| # Gradio expects tuple (sample_rate, audio_array) | |
| audio_data = (sample_rate, audio_array) | |
| gr.Audio( | |
| value=audio_data, | |
| label=method, | |
| interactive=False, | |
| show_download_button=False | |
| ) | |
| else: | |
| gr.Markdown(f"**{method}**\n*No audio*") | |
| return components | |
| def create_parquet_interface(): | |
| """Create the parquet-based comparison interface.""" | |
| with gr.Blocks( | |
| title="Pathological Speech Synthesis Samples", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| # Simple title | |
| gr.Markdown(""" | |
| # Pathological Speech Synthesis Samples | |
| Synthesis methods using identical transcripts for each speaker. | |
| """) | |
| # Speaker tabs | |
| with gr.Tabs(): | |
| for speaker_id, speaker_name in SPEAKERS.items(): | |
| with gr.Tab(speaker_name): | |
| create_speaker_interface(speaker_id) | |
| # Simple footer | |
| gr.Markdown("**Models and datasets:** https://huggingface.co/resproj007") | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_parquet_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) | |