Spaces:
Running
Running
| import gradio as gr | |
| from transformers import Wav2Vec2Processor, AutoModelForCTC | |
| from transformers import Wav2Vec2ProcessorWithLM | |
| import torch | |
| from pyctcdecode import build_ctcdecoder | |
| import librosa | |
| import logging | |
| # This hides the 'Exception ignored in' messages which are usually harmless during shutdown | |
| logging.getLogger("asyncio").setLevel(logging.CRITICAL) | |
| # Replace with your specific model path | |
| MODEL_ID = "eleferrand/w2v-Morisyen" | |
| # Initialize the ASR pipeline | |
| if torch.cuda.is_available(): | |
| device="cuda" | |
| else: | |
| device="cpu" | |
| lm_model= "Morisyen.arpa" | |
| path_checkpoint = MODEL_ID | |
| model = AutoModelForCTC.from_pretrained(path_checkpoint).to(device) | |
| processor = Wav2Vec2Processor.from_pretrained(path_checkpoint) | |
| vocab = processor.tokenizer.get_vocab() | |
| vocab[' '] = vocab['|'] | |
| del vocab[' '] | |
| sorted_dict = {k.lower(): v for k, v in sorted(vocab.items(), key=lambda item: item[1])} | |
| print(sorted_dict) | |
| decoder = build_ctcdecoder( | |
| list(sorted_dict.keys()), | |
| lm_model, | |
| alpha = 0.5, | |
| beta = 1.5 | |
| ) | |
| processor_with_lm = Wav2Vec2ProcessorWithLM( | |
| feature_extractor=processor.feature_extractor, | |
| tokenizer=processor.tokenizer, | |
| decoder=decoder | |
| ) | |
| def transcribe(audio_path, request: gr.Request): | |
| if audio_path is None: | |
| return "No audio recorded", "" | |
| is_api = request.headers.get("sec-ch-ua") is None | |
| w, sr = librosa.load(audio_path, sr=16000) | |
| entry = {"input_values" : processor(w, sampling_rate=sr).input_values[0]} | |
| entry["input_length"] = len(entry["input_values"]) | |
| input_dict = processor_with_lm(entry["input_values"], return_tensors="pt",sampling_rate=16000, padding=True) | |
| logits = model(input_dict.input_values.to(device)).logits | |
| transc = processor_with_lm.decode(logits[0].cpu().detach().numpy()).text | |
| if is_api==False: | |
| direction = "" | |
| if "dwat" in transc: | |
| direction = " ale a dwat..." | |
| elif "gos" in transc: | |
| direction = "ale a gos..." | |
| elif "dwa" in transc: | |
| direction = "ale tout dwa" | |
| elif "se bon" in transc: | |
| direction = "end" | |
| else: | |
| direction = transc | |
| return direction, transc | |
| else: | |
| return transc, "something" | |
| # Define the interface | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=[gr.Text(), gr.Text()], | |
| api_name="transcribe" | |
| ) | |
| # Use this to prevent background threads from hanging on exit | |
| demo.launch(show_error=True) |