Spaces:
Runtime error
Runtime error
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
| import torchaudio | |
| import torch | |
| # ุชุญู ูู ุงูู ุนุงูุฌ ูุงูู ูุฏูู ุงูุนุฑุจู | |
| processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-arabic") | |
| model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-arabic") | |
| def speech_to_text(audio_path): | |
| if audio_path is None: | |
| raise ValueError("ุงูุตูุช ุบูุฑ ู ูุฌูุฏ") | |
| # ุชุญู ูู ุงูู ูู ุงูุตูุชู | |
| waveform, sample_rate = torchaudio.load(audio_path) | |
| # ุฅุฐุง ุงูุตูุช ุณุชูุฑูู ูุญููู ูู ููู | |
| if waveform.shape[0] > 1: | |
| waveform = waveform.mean(dim=0).unsqueeze(0) | |
| # ุฅุนุงุฏุฉ ุชุญููู ุงูุชุฑุฏุฏ ุฅูู 16000 ูู ูุงู ู ุฎุชูู | |
| if sample_rate != 16000: | |
| resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) | |
| waveform = resampler(waveform) | |
| # ุชุฌููุฒ ุงูุฅุฏุฎุงู ูููู ูุฐุฌ | |
| input_values = processor(waveform.squeeze().numpy(), return_tensors="pt", sampling_rate=16000).input_values | |
| # ุชู ุฑูุฑ ุงูุจูุงูุงุช ูููู ูุฐุฌ ูุงูุญุตูู ุนูู ุงููุชุงุฆุฌ | |
| with torch.no_grad(): | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| # ุชุญููู ุงูุชูุจุค ุฅูู ูุต | |
| transcription = processor.batch_decode(predicted_ids) | |
| return transcription[0] | |