Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import pipeline | |
| import numpy as np | |
| import gradio as gr | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| model_id = "openai/whisper-medium" | |
| print("\n\nReading Languages...\n\n") | |
| with open("languages.txt", "r") as file: | |
| languages = file.read().strip().split(",") | |
| languages = [language.strip().lower() for language in languages] | |
| print("\n\nInitializing model...\n\n") | |
| transcriber = pipeline( | |
| "automatic-speech-recognition", | |
| model=model_id, | |
| torch_dtype=torch_dtype, | |
| device=device, | |
| ) | |
| print("\n\nModel Ready!!\n\nLaunching Interface...\n\n") | |
| def transcribe(audio, language: str): | |
| sr, y = audio | |
| # Convert to mono if stereo | |
| if y.ndim > 1: | |
| y = y.mean(axis=1) | |
| y = y.astype(np.float32) | |
| y /= np.max(np.abs(y)) | |
| language = language.lower() | |
| if(language not in languages): | |
| return "Error!! Not a valid language!!" | |
| args = {"task":"transcribe", "language":language} | |
| return transcriber({"sampling_rate": sr, "raw": y}, generate_kwargs=args)["text"] | |
| demo = gr.Interface( | |
| transcribe, | |
| inputs=[gr.Audio(sources="microphone"), gr.Textbox(label="Language", placeholder="Enter the language")], | |
| outputs=["text"], | |
| title="Whisper Model Interface", | |
| description=model_id | |
| ) | |
| demo.launch() |