Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import csv | |
| import fcntl | |
| from datetime import datetime | |
| # --- Start of Local Mode Implementation --- | |
| # Check for an environment variable to run in a local-only mode. | |
| # To enable, run with: GRADIO_LOCAL_MODE=true python your_script.py | |
| IS_LOCAL_MODE = os.environ.get("GRADIO_LOCAL_MODE", "false").lower() in ["true", "1"] | |
| if IS_LOCAL_MODE: | |
| print("Running in LOCAL mode. Hugging Face functionalities are disabled.") | |
| create_repo = None | |
| Dataset = None | |
| else: | |
| # Optional: Hugging Face dataset push | |
| try: | |
| from huggingface_hub import create_repo | |
| from datasets import Dataset | |
| print("Hugging Face libraries found. HF push functionality is available.") | |
| except ImportError: | |
| print("Hugging Face libraries not found. HF push functionality will be disabled.") | |
| create_repo = None | |
| Dataset = None | |
| # --- End of Local Mode Implementation --- | |
| # Configuration | |
| SAMPLES_DIR = "sample-audios" | |
| OUTPUT_CSV = "responses.csv" | |
| # Detailed explanations for each criterion (1..5 scale) | |
| CRITERIA_EXPLANATIONS = { | |
| "Clarity & Intelligibility": { | |
| 5: "Speech is clear, easy to understand (at all speeds).", | |
| 4: "Mostly clear, minor issues (with fast/slow playback).", | |
| 3: "Understandable but requires effort; some words unclear.", | |
| 2: "Often unclear or distorted; difficult to follow.", | |
| 1: "Unacceptable.", | |
| }, | |
| "Accent & Pronunciation": { | |
| 5: "Pronunciation is natural and appropriate for the target dialect.", | |
| 4: "Minor pronunciation quirks but overall fine.", | |
| 3: "Some mispronunciations that require effort to interpret.", | |
| 2: "Frequent pronunciation issues that impede understanding.", | |
| 1: "Severe pronunciation problems; largely unintelligible.", | |
| }, | |
| "Tone & Suitability": { | |
| 5: "Tone fits the content and use-case perfectly.", | |
| 4: "Generally appropriate tone with small mismatches.", | |
| 3: "Tone is acceptable but occasionally inappropriate.", | |
| 2: "Tone often feels off or distracting from the content.", | |
| 1: "Tone is inappropriate or harmful for the content.", | |
| }, | |
| "Voice quality": { | |
| 5: "Natural, pleasant voice with no artifacts.", | |
| 4: "Minor artifacts but overall high quality.", | |
| 3: "Noticeable quality issues but still usable.", | |
| 2: "Poor quality with frequent artifacts.", | |
| 1: "Unusable voice quality.", | |
| }, | |
| "Customization & Flexibility": { | |
| 5: "Highly flexible and customizable for different styles.", | |
| 4: "Some customization available; works well for most cases.", | |
| 3: "Limited customization; acceptable for simple use-cases.", | |
| 2: "Very limited or brittle customization options.", | |
| 1: "No useful customization; inflexible.", | |
| }, | |
| "Listening comfort": { | |
| 5: "Comfortable to listen to for extended periods.", | |
| 4: "Mostly comfortable with occasional sharpness or fatigue.", | |
| 3: "Some listening fatigue; tolerable for short durations.", | |
| 2: "Often fatiguing or distracting to listen to.", | |
| 1: "Uncomfortable or painful to listen to.", | |
| }, | |
| } | |
| def list_samples(): | |
| # Return sorted list of audio filenames | |
| if not os.path.isdir(SAMPLES_DIR): | |
| return [] | |
| files = [f for f in os.listdir(SAMPLES_DIR) if f.lower().endswith(('.wav', '.mp3', '.ogg', '.flac'))] | |
| files.sort() | |
| return files | |
| def save_response(sample, system_path, annotator, clarity, accent, tone, voice_quality, customization, comfort, comment, session_id=None, user_email=None): | |
| os.makedirs(os.path.dirname(OUTPUT_CSV) or '.', exist_ok=True) | |
| header = [ | |
| "timestamp", | |
| "sample", | |
| "system_path", | |
| "annotator", | |
| "session_id", | |
| "user_email", | |
| "clarity", | |
| "accent", | |
| "tone", | |
| "voice_quality", | |
| "customization", | |
| "comfort", | |
| "comment", | |
| ] | |
| row = [ | |
| datetime.utcnow().isoformat(), | |
| sample, | |
| system_path, | |
| annotator, | |
| session_id or "", | |
| user_email or "", | |
| clarity, | |
| accent, | |
| tone, | |
| voice_quality, | |
| customization, | |
| comfort, | |
| comment, | |
| ] | |
| write_header = not os.path.exists(OUTPUT_CSV) | |
| # atomic append with advisory lock | |
| with open(OUTPUT_CSV, "a", newline='', encoding='utf-8') as f: | |
| try: | |
| fcntl.flock(f.fileno(), fcntl.LOCK_EX) | |
| except Exception: | |
| pass | |
| writer = csv.writer(f) | |
| if write_header: | |
| writer.writerow(header) | |
| writer.writerow(row) | |
| try: | |
| fcntl.flock(f.fileno(), fcntl.LOCK_UN) | |
| except Exception: | |
| pass | |
| hf_result = None | |
| if not IS_LOCAL_MODE: | |
| try: | |
| hf_record = dict(zip(header, row)) | |
| hf_result = save_responses_to_hf([hf_record]) | |
| except Exception as e: | |
| hf_result = {"status": "hf_error", "error": str(e)} | |
| return {"status": "saved", "sample": sample, "hf": hf_result} | |
| def save_responses_to_hf(rows, repo_id: str | None = None, token: str | None = None): | |
| if create_repo is None or Dataset is None: | |
| return {"status": "hf_unavailable", "reason": "missing_packages_or_local_mode"} | |
| token = token or os.environ.get("HF_TOKEN") | |
| repo_id = repo_id or os.environ.get("HF_DATASET_ID") | |
| if not token or not repo_id: | |
| return {"status": "hf_skipped", "reason": "missing_token_or_repo_env"} | |
| try: | |
| create_repo(repo_id=repo_id, repo_type="dataset", token=token, private=True, exist_ok=True) | |
| repo_err = None | |
| except Exception as e: | |
| repo_err = str(e) | |
| ds = Dataset.from_list(rows) | |
| try: | |
| ds.push_to_hub(repo_id, token=token) | |
| except Exception as e: | |
| return {"status": "hf_push_error", "error": str(e), "repo_error": repo_err} | |
| return {"status": "hf_pushed", "rows": len(rows), "repo": repo_id, "repo_error": repo_err} | |
| def make_ui(): | |
| criteria = [ | |
| "Clarity & Intelligibility", | |
| "Accent & Pronunciation", | |
| "Tone & Suitability", | |
| "Voice quality", | |
| "Customization & Flexibility", | |
| "Listening comfort", | |
| ] | |
| def make_explainer_fn(crit): | |
| mapping = CRITERIA_EXPLANATIONS.get(crit, {}) | |
| def expl(val): | |
| try: | |
| iv = int(val) | |
| except (ValueError, TypeError): | |
| iv = val | |
| text = mapping.get(iv, "Select a score to see its meaning.") | |
| return f"**{crit} ({iv}/5):** {text}" | |
| return expl | |
| with gr.Blocks() as demo: | |
| samples_list = gr.State(list_samples()) | |
| current_index = gr.State(0) | |
| gr.Markdown("# TTS Rubric — Compact Evaluation") | |
| with gr.Accordion("Scoring guide & Annotator Info", open=False): | |
| with gr.Row(): | |
| annotator_global = gr.Textbox(label="Annotator ID", lines=1, scale=1) | |
| session_id_global = gr.Textbox(label="Session ID", lines=1, scale=1) | |
| user_email_global = gr.Textbox(label="User email", lines=1, scale=1) | |
| guide_lines = [] | |
| for crit, mapping in CRITERIA_EXPLANATIONS.items(): | |
| guide_lines.append(f"### {crit}") | |
| for score in sorted(mapping.keys(), reverse=True): | |
| guide_lines.append(f"- **{score} points**: {mapping[score]}") | |
| guide_lines.append("") | |
| guide_md = "\n".join(guide_lines) | |
| gr.Markdown(guide_md) | |
| progress_md = gr.Markdown("Sample 1 of X") | |
| # Main evaluation layout | |
| with gr.Row(equal_height=True): | |
| # Left Column: Audio Players | |
| with gr.Column(scale=1): | |
| sample_name_md = gr.Markdown("### Sample Filename") | |
| reference_audio = gr.Audio(label="Reference audio") | |
| system_audio = gr.Audio(label="Evaluation output") | |
| reference_missing_md = gr.Markdown("(reference audio missing)", visible=False) | |
| system_missing_md = gr.Markdown("(system output missing)", visible=False) | |
| submit_btn = gr.Button("Save & Next", variant="primary", scale=1) | |
| status = gr.Textbox(label="Status", interactive=False, scale=2) | |
| # Right Column: All evaluation controls | |
| with gr.Column(scale=1): | |
| # Sliders in a 2x3 grid | |
| with gr.Column(): | |
| #with gr.Column(): | |
| clarity = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[0], value=3) | |
| accent = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[1], value=3) | |
| tone = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[2], value=3) | |
| #with gr.Column(): | |
| voice_quality = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[3], value=3) | |
| customization = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[4], value=3) | |
| comfort = gr.Slider(minimum=1, maximum=5, step=1, label=criteria[5], value=3) | |
| # Single explanation box | |
| slider_explanation_md = gr.Markdown("Select a score to see its meaning.") | |
| comment = gr.Textbox(label="Comments (optional)", lines=2, value="") | |
| #with gr.Row(): | |
| # submit_btn = gr.Button("Save & Next", variant="primary", scale=1) | |
| # status = gr.Textbox(label="Status", interactive=False, scale=2) | |
| with gr.Row(): | |
| export_btn = gr.Button("Export responses to CSV") | |
| export_file = gr.File(label="Download responses.csv", interactive=False) | |
| # --- LOGIC & EVENTS --- | |
| def load_sample(samples, index): | |
| total_samples = len(samples) | |
| if index >= total_samples: | |
| # End of evaluation session | |
| completion_msg = f"**All {total_samples} samples completed!** You can close this window." | |
| return { | |
| progress_md: gr.update(value=completion_msg), | |
| sample_name_md: gr.update(visible=False), | |
| reference_audio: gr.update(visible=False), | |
| system_audio: gr.update(visible=False), | |
| reference_missing_md: gr.update(visible=False), | |
| system_missing_md: gr.update(visible=False), | |
| clarity: gr.update(visible=False), accent: gr.update(visible=False), tone: gr.update(visible=False), | |
| voice_quality: gr.update(visible=False), customization: gr.update(visible=False), comfort: gr.update(visible=False), | |
| slider_explanation_md: gr.update(visible=False), | |
| comment: gr.update(visible=False), | |
| submit_btn: gr.update(visible=False), | |
| status: gr.update(value="Finished.") | |
| } | |
| sample = samples[index] | |
| sample_path = os.path.join(SAMPLES_DIR, sample) | |
| sys_path = os.path.join("system-outputs", "system_a", sample) | |
| ref_exists = os.path.exists(sample_path) | |
| sys_exists = os.path.exists(sys_path) | |
| return { | |
| progress_md: gr.update(value=f"Sample **{index + 1}** of **{total_samples}**"), | |
| sample_name_md: gr.update(value=f"### {sample}", visible=True), | |
| reference_audio: gr.update(value=sample_path if ref_exists else None, visible=ref_exists), | |
| reference_missing_md: gr.update(visible=not ref_exists), | |
| system_audio: gr.update(value=sys_path if sys_exists else None, visible=sys_exists), | |
| system_missing_md: gr.update(visible=not sys_exists), | |
| clarity: gr.update(value=3), accent: gr.update(value=3), tone: gr.update(value=3), | |
| voice_quality: gr.update(value=3), customization: gr.update(value=3), comfort: gr.update(value=3), | |
| slider_explanation_md: gr.update(value="Select a score to see its meaning."), | |
| comment: gr.update(value=""), | |
| submit_btn: gr.update(visible=True), # <-- THE FIX IS HERE | |
| status: gr.update(value="Ready."), | |
| } | |
| def save_and_next(index, samples, annotator, sid, email, cl, ac, to, vq, cu, co, comm): | |
| sample = samples[index] | |
| sys_path = os.path.join("system-outputs", "system_a", sample) | |
| save_status = save_response(sample, sys_path, annotator, cl, ac, to, vq, cu, co, comm, session_id=sid, user_email=email) | |
| next_index = index + 1 | |
| updates = load_sample(samples, next_index) | |
| # The status update from the save operation is now correctly added | |
| updates[status] = gr.update(value=str(save_status['status'])) | |
| # Now, the number of values returned will always match the 17 outputs | |
| return [next_index] + list(updates.values()) | |
| # Wire up slider explanations to the single markdown box | |
| all_sliders = [clarity, accent, tone, voice_quality, customization, comfort] | |
| for i, slider in enumerate(all_sliders): | |
| slider.change(make_explainer_fn(criteria[i]), inputs=[slider], outputs=[slider_explanation_md]) | |
| # Define outputs for loading and saving | |
| ui_elements = [ | |
| progress_md, sample_name_md, reference_audio, reference_missing_md, | |
| system_audio, system_missing_md, clarity, accent, tone, voice_quality, | |
| customization, comfort, slider_explanation_md, comment, submit_btn, status | |
| ] | |
| # Initial load | |
| demo.load(load_sample, inputs=[samples_list, current_index], outputs=ui_elements) | |
| # Button click event | |
| submit_btn.click( | |
| save_and_next, | |
| inputs=[current_index, samples_list, annotator_global, session_id_global, user_email_global, clarity, accent, tone, voice_quality, customization, comfort, comment], | |
| outputs=[current_index, *ui_elements], | |
| ) | |
| export_btn.click(lambda: OUTPUT_CSV if os.path.exists(OUTPUT_CSV) else None, inputs=[], outputs=[export_file]) | |
| return demo | |
| if __name__ == "__main__": | |
| app = make_ui() | |
| app.launch(server_name="0.0.0.0", server_port=7860) | |