Spaces:

resproj007
/

pathological_speech

Sleeping

File size: 4,685 Bytes

1c29f6f

#!/usr/bin/env python3
"""
Pathological Speech Synthesis Comparison - Parquet Version
==========================================================
Uses parquet files instead of wav files to avoid Git LFS requirements.
"""

import gradio as gr
import pandas as pd
import numpy as np
import json
from pathlib import Path
from typing import Dict, List, Tuple

# Speaker info for tabs
SPEAKERS = {
    'F04': 'F04 (TORGO Dys F)', 
    'M02': 'M02 (TORGO Dys M)',
    'FC02': 'FC02 (TORGO Hlth F)', 
    'MC01': 'MC01 (TORGO Hlth M)',
    'F02': 'F02 (UA Dys F)', 
    'M04': 'M04 (UA Dys M)',
    '211': '211 (LibriSp F)', 
    '4014': '4014 (LibriSp M)'
}

# Methods in evaluation order
METHODS = [
    'Original',
    'Baseline_KNN', 
    'Orpheus_TTS',
    'Orpheus_KNN',
    'Sesame_TTS', 
    'Sesame_KNN',
    'Spark_TTS',
    'Spark_KNN'
]

def load_speaker_data(speaker_id: str) -> pd.DataFrame:
    """Load parquet data for a speaker."""
    parquet_file = Path(f'{speaker_id}_audio_samples.parquet')
    
    if not parquet_file.exists():
        print(f"⚠️ Parquet file not found: {parquet_file}")
        return pd.DataFrame()
    
    try:
        df = pd.read_parquet(parquet_file)
        return df
    except Exception as e:
        print(f"⚠️ Error loading parquet for {speaker_id}: {e}")
        return pd.DataFrame()

def get_audio_from_parquet(df: pd.DataFrame, method: str, sample_num: str) -> Tuple[int, np.ndarray]:
    """Extract audio array from parquet data."""
    # Filter for specific method and sample
    filtered = df[(df['method'] == method) & (df['sample_number'] == sample_num)]
    
    if filtered.empty:
        return None, None
    
    row = filtered.iloc[0]
    
    # Parse audio array from JSON string
    audio_array = json.loads(row['audio_array'])
    sample_rate = int(row['sample_rate'])
    
    return sample_rate, np.array(audio_array, dtype=np.float32)

def get_transcript_info(df: pd.DataFrame, sample_num: str) -> str:
    """Get transcript for a sample number."""
    sample_info = df[df['sample_number'] == sample_num]
    if not sample_info.empty:
        return sample_info.iloc[0]['transcript']
    return f"Sample {sample_num}"

def create_speaker_interface(speaker_id: str):
    """Create audio comparison interface for one speaker using parquet data."""
    
    # Load speaker data
    df = load_speaker_data(speaker_id)
    
    if df.empty:
        return [gr.Markdown(f"⚠️ No data available for {speaker_id}")]
    
    components = []
    
    # Header
    components.append(gr.Markdown(f"## {SPEAKERS[speaker_id]}"))
    
    # Get available samples
    sample_numbers = sorted(df['sample_number'].unique())
    
    # Audio comparison grid
    for sample_num in sample_numbers[:3]:  # Maximum 3 samples
        transcript = get_transcript_info(df, sample_num)
        
        components.append(gr.Markdown(f"### Sample {sample_num}: \"{transcript}\""))
        
        with gr.Row():
            for method in METHODS:
                with gr.Column(scale=1):
                    sample_rate, audio_array = get_audio_from_parquet(df, method, sample_num)
                    
                    if sample_rate is not None and audio_array is not None:
                        # Gradio expects tuple (sample_rate, audio_array)
                        audio_data = (sample_rate, audio_array)
                        gr.Audio(
                            value=audio_data,
                            label=method,
                            interactive=False,
                            show_download_button=False
                        )
                    else:
                        gr.Markdown(f"**{method}**\n*No audio*")
    
    return components

def create_parquet_interface():
    """Create the parquet-based comparison interface."""
    
    with gr.Blocks(
        title="Pathological Speech Synthesis Samples",
        theme=gr.themes.Soft()
    ) as demo:
        
        # Simple title
        gr.Markdown("""
        # Pathological Speech Synthesis Samples
        Synthesis methods using identical transcripts for each speaker.
        """)
        
        # Speaker tabs
        with gr.Tabs():
            for speaker_id, speaker_name in SPEAKERS.items():
                with gr.Tab(speaker_name):
                    create_speaker_interface(speaker_id)
        
        # Simple footer
        gr.Markdown("**Models and datasets:** https://huggingface.co/resproj007")
    
    return demo

if __name__ == "__main__":
    demo = create_parquet_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )