""" Hugging Face Space: speak Italian → **Italian transcription + English translation**. Whisper (Italian) + Marian IT→EN. Run locally: ``python app.py`` """ from __future__ import annotations import os from pathlib import Path os.environ.setdefault("ASR_REALTIME_MODE", "quality") os.environ.setdefault("ASR_WHISPER_LANGUAGE", "italian") os.environ.setdefault("ASR_TRANSLATE", "1") import numpy as np import gradio as gr from italian_en_pipeline import ItalianEnglishPipeline _SPACE_ROOT = Path(__file__).resolve().parent _pipeline: ItalianEnglishPipeline | None = None def _get_pipeline() -> ItalianEnglishPipeline: global _pipeline if _pipeline is None: _pipeline = ItalianEnglishPipeline(project_root=str(_SPACE_ROOT)) return _pipeline def transcribe(audio: tuple[int, np.ndarray] | None) -> tuple[str, str]: """Gradio Audio (numpy) → (italian, english).""" if audio is None: return "", "" sr, data = audio if data is None or len(data) == 0: return "", "" x = np.asarray(data, dtype=np.float32) if x.ndim > 1: x = x.mean(axis=-1) floats = x.reshape(-1).tolist() return _get_pipeline().transcribe_chunk(floats, int(sr)) with gr.Blocks(title="Italian speech → Italian + English") as demo: gr.Markdown( "### Italian → Italian + English\n" "Speak or upload **Italian** audio. Output is **recognized Italian** and **English** translation " "(Whisper + Marian). Optional fine-tuned Whisper in `models/whisper_finetuned_it/`." ) audio_in = gr.Audio( sources=["microphone", "upload"], type="numpy", label="Audio (Italian)", ) run_btn = gr.Button("Transcribe", variant="primary") out_it = gr.Textbox(label="Italian (ASR)", lines=4) out_en = gr.Textbox(label="English (translation)", lines=4) run_btn.click(fn=transcribe, inputs=[audio_in], outputs=[out_it, out_en]) if __name__ == "__main__": port = int(os.environ.get("PORT", "7860")) demo.launch(server_name="0.0.0.0", server_port=port)