tts-evaluation / app.py
asuni's picture
Upload app.py
367255e verified
import gradio as gr
import os
import csv
import fcntl
from datetime import datetime
# --- Start of Local Mode Implementation ---
# Check for an environment variable to run in a local-only mode.
# To enable, run with: GRADIO_LOCAL_MODE=true python your_script.py
IS_LOCAL_MODE = os.environ.get("GRADIO_LOCAL_MODE", "false").lower() in ["true", "1"]
if IS_LOCAL_MODE:
print("Running in LOCAL mode. Hugging Face functionalities are disabled.")
create_repo = None
Dataset = None
else:
# Optional: Hugging Face dataset push
try:
from huggingface_hub import create_repo
from datasets import Dataset
print("Hugging Face libraries found. HF push functionality is available.")
except ImportError:
print("Hugging Face libraries not found. HF push functionality will be disabled.")
create_repo = None
Dataset = None
# --- End of Local Mode Implementation ---
# Configuration
SAMPLES_DIR = "sample-audios"
OUTPUT_CSV = "responses.csv"
# Detailed explanations for each criterion (1..5 scale)
CRITERIA_EXPLANATIONS = {
"Clarity & Intelligibility": {
5: "Speech is clear, easy to understand (at all speeds).",
4: "Mostly clear, minor issues (with fast/slow playback).",
3: "Understandable but requires effort; some words unclear.",
2: "Often unclear or distorted; difficult to follow.",
1: "Unacceptable.",
},
"Accent & Pronunciation": {
5: "Pronunciation is natural and appropriate for the target dialect.",
4: "Minor pronunciation quirks but overall fine.",
3: "Some mispronunciations that require effort to interpret.",
2: "Frequent pronunciation issues that impede understanding.",
1: "Severe pronunciation problems; largely unintelligible.",
},
"Tone & Suitability": {
5: "Tone fits the content and use-case perfectly.",
4: "Generally appropriate tone with small mismatches.",
3: "Tone is acceptable but occasionally inappropriate.",
2: "Tone often feels off or distracting from the content.",
1: "Tone is inappropriate or harmful for the content.",
},
"Voice quality": {
5: "Natural, pleasant voice with no artifacts.",
4: "Minor artifacts but overall high quality.",
3: "Noticeable quality issues but still usable.",
2: "Poor quality with frequent artifacts.",
1: "Unusable voice quality.",
},
"Customization & Flexibility": {
5: "Highly flexible and customizable for different styles.",
4: "Some customization available; works well for most cases.",
3: "Limited customization; acceptable for simple use-cases.",
2: "Very limited or brittle customization options.",
1: "No useful customization; inflexible.",
},
"Listening comfort": {
5: "Comfortable to listen to for extended periods.",
4: "Mostly comfortable with occasional sharpness or fatigue.",
3: "Some listening fatigue; tolerable for short durations.",
2: "Often fatiguing or distracting to listen to.",
1: "Uncomfortable or painful to listen to.",
},
}
def list_samples():
# Return sorted list of audio filenames
if not os.path.isdir(SAMPLES_DIR):
return []
files = [f for f in os.listdir(SAMPLES_DIR) if f.lower().endswith(('.wav', '.mp3', '.ogg', '.flac'))]
files.sort()
return files
def save_response(sample, system_path, annotator, clarity, accent, tone, voice_quality, customization, comfort, comment, session_id=None, user_email=None):
os.makedirs(os.path.dirname(OUTPUT_CSV) or '.', exist_ok=True)
header = [
"timestamp",
"sample",
"system_path",
"annotator",
"session_id",
"user_email",
"clarity",
"accent",
"tone",
"voice_quality",
"customization",
"comfort",
"comment",
]
row = [
datetime.utcnow().isoformat(),
sample,
system_path,
annotator,
session_id or "",
user_email or "",
clarity,
accent,
tone,
voice_quality,
customization,
comfort,
comment,
]
write_header = not os.path.exists(OUTPUT_CSV)
# atomic append with advisory lock
with open(OUTPUT_CSV, "a", newline='', encoding='utf-8') as f:
try:
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
except Exception:
pass
writer = csv.writer(f)
if write_header:
writer.writerow(header)
writer.writerow(row)
try:
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
except Exception:
pass
hf_result = None
if not IS_LOCAL_MODE:
try:
hf_record = dict(zip(header, row))
hf_result = save_responses_to_hf([hf_record])
except Exception as e:
hf_result = {"status": "hf_error", "error": str(e)}
return {"status": "saved", "sample": sample, "hf": hf_result}
def save_responses_to_hf(rows, repo_id: str | None = None, token: str | None = None):
if create_repo is None or Dataset is None:
return {"status": "hf_unavailable", "reason": "missing_packages_or_local_mode"}
token = token or os.environ.get("HF_TOKEN")
repo_id = repo_id or os.environ.get("HF_DATASET_ID")
if not token or not repo_id:
return {"status": "hf_skipped", "reason": "missing_token_or_repo_env"}
try:
create_repo(repo_id=repo_id, repo_type="dataset", token=token, private=True, exist_ok=True)
repo_err = None
except Exception as e:
repo_err = str(e)
ds = Dataset.from_list(rows)
try:
ds.push_to_hub(repo_id, token=token)
except Exception as e:
return {"status": "hf_push_error", "error": str(e), "repo_error": repo_err}
return {"status": "hf_pushed", "rows": len(rows), "repo": repo_id, "repo_error": repo_err}
def make_ui():
criteria = [
"Clarity & Intelligibility",
"Accent & Pronunciation",
"Tone & Suitability",
"Voice quality",
"Customization & Flexibility",
"Listening comfort",
]
def make_explainer_fn(crit):
mapping = CRITERIA_EXPLANATIONS.get(crit, {})
def expl(val):
try:
iv = int(val)
except (ValueError, TypeError):
iv = val
text = mapping.get(iv, "Select a score to see its meaning.")
return f"**{crit} ({iv}/5):** {text}"
return expl
with gr.Blocks() as demo:
samples_list = gr.State(list_samples())
current_index = gr.State(0)
gr.Markdown("# TTS Rubric — Compact Evaluation")
with gr.Accordion("Scoring guide & Annotator Info", open=False):
with gr.Row():
annotator_global = gr.Textbox(label="Annotator ID", lines=1, scale=1)
session_id_global = gr.Textbox(label="Session ID", lines=1, scale=1)
user_email_global = gr.Textbox(label="User email", lines=1, scale=1)
guide_lines = []
for crit, mapping in CRITERIA_EXPLANATIONS.items():
guide_lines.append(f"### {crit}")
for score in sorted(mapping.keys(), reverse=True):
guide_lines.append(f"- **{score} points**: {mapping[score]}")
guide_lines.append("")
guide_md = "\n".join(guide_lines)
gr.Markdown(guide_md)
progress_md = gr.Markdown("Sample 1 of X")
# Main evaluation layout
with gr.Row(equal_height=True):
# Left Column: Audio Players
with gr.Column(scale=1):
sample_name_md = gr.Markdown("### Sample Filename")
reference_audio = gr.Audio(label="Reference audio")
system_audio = gr.Audio(label="Evaluation output")
reference_missing_md = gr.Markdown("(reference audio missing)", visible=False)
system_missing_md = gr.Markdown("(system output missing)", visible=False)
submit_btn = gr.Button("Save & Next", variant="primary", scale=1)
status = gr.Textbox(label="Status", interactive=False, scale=2)
# Right Column: All evaluation controls
with gr.Column(scale=1):
# Sliders in a 2x3 grid
with gr.Column():
#with gr.Column():
clarity = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[0], value=3)
accent = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[1], value=3)
tone = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[2], value=3)
#with gr.Column():
voice_quality = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[3], value=3)
customization = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[4], value=3)
comfort = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[5], value=3)
# Single explanation box
slider_explanation_md = gr.Markdown("Select a score to see its meaning.")
comment = gr.Textbox(label="Comments (optional)", lines=2, value="")
#with gr.Row():
# submit_btn = gr.Button("Save & Next", variant="primary", scale=1)
# status = gr.Textbox(label="Status", interactive=False, scale=2)
with gr.Row():
export_btn = gr.Button("Export responses to CSV")
export_file = gr.File(label="Download responses.csv", interactive=False)
# --- LOGIC & EVENTS ---
def load_sample(samples, index):
total_samples = len(samples)
if index >= total_samples:
# End of evaluation session
completion_msg = f"**All {total_samples} samples completed!** You can close this window."
return {
progress_md: gr.update(value=completion_msg),
sample_name_md: gr.update(visible=False),
reference_audio: gr.update(visible=False),
system_audio: gr.update(visible=False),
reference_missing_md: gr.update(visible=False),
system_missing_md: gr.update(visible=False),
clarity: gr.update(visible=False), accent: gr.update(visible=False), tone: gr.update(visible=False),
voice_quality: gr.update(visible=False), customization: gr.update(visible=False), comfort: gr.update(visible=False),
slider_explanation_md: gr.update(visible=False),
comment: gr.update(visible=False),
submit_btn: gr.update(visible=False),
status: gr.update(value="Finished.")
}
sample = samples[index]
sample_path = os.path.join(SAMPLES_DIR, sample)
sys_path = os.path.join("system-outputs", "system_a", sample)
ref_exists = os.path.exists(sample_path)
sys_exists = os.path.exists(sys_path)
return {
progress_md: gr.update(value=f"Sample **{index + 1}** of **{total_samples}**"),
sample_name_md: gr.update(value=f"### {sample}", visible=True),
reference_audio: gr.update(value=sample_path if ref_exists else None, visible=ref_exists),
reference_missing_md: gr.update(visible=not ref_exists),
system_audio: gr.update(value=sys_path if sys_exists else None, visible=sys_exists),
system_missing_md: gr.update(visible=not sys_exists),
clarity: gr.update(value=3), accent: gr.update(value=3), tone: gr.update(value=3),
voice_quality: gr.update(value=3), customization: gr.update(value=3), comfort: gr.update(value=3),
slider_explanation_md: gr.update(value="Select a score to see its meaning."),
comment: gr.update(value=""),
submit_btn: gr.update(visible=True), # <-- THE FIX IS HERE
status: gr.update(value="Ready."),
}
def save_and_next(index, samples, annotator, sid, email, cl, ac, to, vq, cu, co, comm):
sample = samples[index]
sys_path = os.path.join("system-outputs", "system_a", sample)
save_status = save_response(sample, sys_path, annotator, cl, ac, to, vq, cu, co, comm, session_id=sid, user_email=email)
next_index = index + 1
updates = load_sample(samples, next_index)
# The status update from the save operation is now correctly added
updates[status] = gr.update(value=str(save_status['status']))
# Now, the number of values returned will always match the 17 outputs
return [next_index] + list(updates.values())
# Wire up slider explanations to the single markdown box
all_sliders = [clarity, accent, tone, voice_quality, customization, comfort]
for i, slider in enumerate(all_sliders):
slider.change(make_explainer_fn(criteria[i]), inputs=[slider], outputs=[slider_explanation_md])
# Define outputs for loading and saving
ui_elements = [
progress_md, sample_name_md, reference_audio, reference_missing_md,
system_audio, system_missing_md, clarity, accent, tone, voice_quality,
customization, comfort, slider_explanation_md, comment, submit_btn, status
]
# Initial load
demo.load(load_sample, inputs=[samples_list, current_index], outputs=ui_elements)
# Button click event
submit_btn.click(
save_and_next,
inputs=[current_index, samples_list, annotator_global, session_id_global, user_email_global, clarity, accent, tone, voice_quality, customization, comfort, comment],
outputs=[current_index, *ui_elements],
)
export_btn.click(lambda: OUTPUT_CSV if os.path.exists(OUTPUT_CSV) else None, inputs=[], outputs=[export_file])
return demo
if __name__ == "__main__":
app = make_ui()
app.launch(server_name="0.0.0.0", server_port=7860)