Spaces:
Sleeping
Sleeping
| import spaces | |
| import time | |
| import os | |
| import torch | |
| import gradio as gr | |
| from transformers.pipelines import pipeline | |
| import utils | |
| from config import ( | |
| MODEL_PATHS, | |
| SUPPORTED_LANGUAGES, | |
| CUSTOM_CSS, | |
| ) | |
| # set language here: available are en, de and lb | |
| LANGUAGE = "lb" | |
| if LANGUAGE not in SUPPORTED_LANGUAGES: | |
| print(f"language ({LANGUAGE}) not supported. Use one of {SUPPORTED_LANGUAGES}") | |
| exit() | |
| else: | |
| MODEL_PATH = MODEL_PATHS[LANGUAGE] | |
| _asr_pipeline = None | |
| def transcribe_gradio(audio_path: str | None) -> str: | |
| if not audio_path: | |
| return "⚠️ Please record something or choose a file first." | |
| global _asr_pipeline | |
| if _asr_pipeline is None: | |
| _asr_pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model=MODEL_PATH, | |
| device=0 if torch.cuda.is_available() else -1, | |
| chunk_length_s=30, | |
| stride_length_s=(4, 2), | |
| batch_size=8, | |
| token=os.getenv("HF_TOKEN"), | |
| ) | |
| start = time.time() | |
| try: | |
| result = _asr_pipeline(audio_path) | |
| transcript = result["text"] if isinstance(result, dict) else str(result) | |
| except Exception as err: | |
| return f"❌ {err}" | |
| runtime = time.time() - start | |
| return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s" | |
| # gradio interface | |
| with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo: | |
| gr.Markdown(""" | |
| # 🎙️ Speech-to-Text Demo — Wave2Vec (Luxembourgish) | |
| Use **Record** to capture speech live or **Upload** to select an audio file (.wav, .mp3, .flac). | |
| Hit **Transcribe** to convert your recording into text, and **Clear** to reset both fields. | |
| """) | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Input audio", | |
| autoplay=False, | |
| ) | |
| output_text = gr.Textbox( | |
| label="Transcript", | |
| placeholder="Your transcript will appear here …", | |
| show_copy_button=True, | |
| lines=10, | |
| ) | |
| with gr.Row(equal_height=True, elem_classes="centered-row") as row: | |
| transcribe_btn = gr.Button("Transcribe ✨", scale=0) | |
| clear_btn = gr.ClearButton( | |
| [audio_input, output_text], scale=0, elem_classes="clear-btn" | |
| ) | |
| transcribe_btn.click(transcribe_gradio, inputs=audio_input, outputs=output_text) | |
| if __name__ == "__main__": | |
| demo.launch() | |