Italian-ASR / app.py
Thedeezat's picture
Italian ASR app bundle only
b7c127b verified
Raw
History Blame Contribute Delete
2.08 kB
"""
Hugging Face Space: speak Italian → **Italian transcription + English translation**.
Whisper (Italian) + Marian IT→EN. Run locally: ``python app.py``
"""
from __future__ import annotations
import os
from pathlib import Path
os.environ.setdefault("ASR_REALTIME_MODE", "quality")
os.environ.setdefault("ASR_WHISPER_LANGUAGE", "italian")
os.environ.setdefault("ASR_TRANSLATE", "1")
import numpy as np
import gradio as gr
from italian_en_pipeline import ItalianEnglishPipeline
_SPACE_ROOT = Path(__file__).resolve().parent
_pipeline: ItalianEnglishPipeline | None = None
def _get_pipeline() -> ItalianEnglishPipeline:
global _pipeline
if _pipeline is None:
_pipeline = ItalianEnglishPipeline(project_root=str(_SPACE_ROOT))
return _pipeline
def transcribe(audio: tuple[int, np.ndarray] | None) -> tuple[str, str]:
"""Gradio Audio (numpy) → (italian, english)."""
if audio is None:
return "", ""
sr, data = audio
if data is None or len(data) == 0:
return "", ""
x = np.asarray(data, dtype=np.float32)
if x.ndim > 1:
x = x.mean(axis=-1)
floats = x.reshape(-1).tolist()
return _get_pipeline().transcribe_chunk(floats, int(sr))
with gr.Blocks(title="Italian speech → Italian + English") as demo:
gr.Markdown(
"### Italian → Italian + English\n"
"Speak or upload **Italian** audio. Output is **recognized Italian** and **English** translation "
"(Whisper + Marian). Optional fine-tuned Whisper in `models/whisper_finetuned_it/`."
)
audio_in = gr.Audio(
sources=["microphone", "upload"],
type="numpy",
label="Audio (Italian)",
)
run_btn = gr.Button("Transcribe", variant="primary")
out_it = gr.Textbox(label="Italian (ASR)", lines=4)
out_en = gr.Textbox(label="English (translation)", lines=4)
run_btn.click(fn=transcribe, inputs=[audio_in], outputs=[out_it, out_en])
if __name__ == "__main__":
port = int(os.environ.get("PORT", "7860"))
demo.launch(server_name="0.0.0.0", server_port=port)