Spaces:

resproj007
/

pathological_speech

Sleeping

App Files Files Community

pathological_speech / app.py

resproj007

Create app.py

1c29f6f verified 3 months ago

raw

history blame contribute delete

4.69 kB

	#!/usr/bin/env python3
	"""
	Pathological Speech Synthesis Comparison - Parquet Version
	==========================================================
	Uses parquet files instead of wav files to avoid Git LFS requirements.
	"""

	import gradio as gr
	import pandas as pd
	import numpy as np
	import json
	from pathlib import Path
	from typing import Dict, List, Tuple

	# Speaker info for tabs
	SPEAKERS = {
	'F04': 'F04 (TORGO Dys F)',
	'M02': 'M02 (TORGO Dys M)',
	'FC02': 'FC02 (TORGO Hlth F)',
	'MC01': 'MC01 (TORGO Hlth M)',
	'F02': 'F02 (UA Dys F)',
	'M04': 'M04 (UA Dys M)',
	'211': '211 (LibriSp F)',
	'4014': '4014 (LibriSp M)'
	}

	# Methods in evaluation order
	METHODS = [
	'Original',
	'Baseline_KNN',
	'Orpheus_TTS',
	'Orpheus_KNN',
	'Sesame_TTS',
	'Sesame_KNN',
	'Spark_TTS',
	'Spark_KNN'
	]

	def load_speaker_data(speaker_id: str) -> pd.DataFrame:
	"""Load parquet data for a speaker."""
	parquet_file = Path(f'{speaker_id}_audio_samples.parquet')

	if not parquet_file.exists():
	print(f"⚠️ Parquet file not found: {parquet_file}")
	return pd.DataFrame()

	try:
	df = pd.read_parquet(parquet_file)
	return df
	except Exception as e:
	print(f"⚠️ Error loading parquet for {speaker_id}: {e}")
	return pd.DataFrame()

	def get_audio_from_parquet(df: pd.DataFrame, method: str, sample_num: str) -> Tuple[int, np.ndarray]:
	"""Extract audio array from parquet data."""
	# Filter for specific method and sample
	filtered = df[(df['method'] == method) & (df['sample_number'] == sample_num)]

	if filtered.empty:
	return None, None

	row = filtered.iloc[0]

	# Parse audio array from JSON string
	audio_array = json.loads(row['audio_array'])
	sample_rate = int(row['sample_rate'])

	return sample_rate, np.array(audio_array, dtype=np.float32)

	def get_transcript_info(df: pd.DataFrame, sample_num: str) -> str:
	"""Get transcript for a sample number."""
	sample_info = df[df['sample_number'] == sample_num]
	if not sample_info.empty:
	return sample_info.iloc[0]['transcript']
	return f"Sample {sample_num}"

	def create_speaker_interface(speaker_id: str):
	"""Create audio comparison interface for one speaker using parquet data."""

	# Load speaker data
	df = load_speaker_data(speaker_id)

	if df.empty:
	return [gr.Markdown(f"⚠️ No data available for {speaker_id}")]

	components = []

	# Header
	components.append(gr.Markdown(f"## {SPEAKERS[speaker_id]}"))

	# Get available samples
	sample_numbers = sorted(df['sample_number'].unique())

	# Audio comparison grid
	for sample_num in sample_numbers[:3]: # Maximum 3 samples
	transcript = get_transcript_info(df, sample_num)

	components.append(gr.Markdown(f"### Sample {sample_num}: \"{transcript}\""))

	with gr.Row():
	for method in METHODS:
	with gr.Column(scale=1):
	sample_rate, audio_array = get_audio_from_parquet(df, method, sample_num)

	if sample_rate is not None and audio_array is not None:
	# Gradio expects tuple (sample_rate, audio_array)
	audio_data = (sample_rate, audio_array)
	gr.Audio(
	value=audio_data,
	label=method,
	interactive=False,
	show_download_button=False
	)
	else:
	gr.Markdown(f"{method}\nNo audio")

	return components

	def create_parquet_interface():
	"""Create the parquet-based comparison interface."""

	with gr.Blocks(
	title="Pathological Speech Synthesis Samples",
	theme=gr.themes.Soft()
	) as demo:

	# Simple title
	gr.Markdown("""
	# Pathological Speech Synthesis Samples
	Synthesis methods using identical transcripts for each speaker.
	""")

	# Speaker tabs
	with gr.Tabs():
	for speaker_id, speaker_name in SPEAKERS.items():
	with gr.Tab(speaker_name):
	create_speaker_interface(speaker_id)

	# Simple footer
	gr.Markdown("Models and datasets: https://huggingface.co/resproj007")

	return demo

	if __name__ == "__main__":
	demo = create_parquet_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)