Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import librosa | |
| from phonemizer import phonemize | |
| def extract_phonemes(text): | |
| """Convert text to phonemes""" | |
| return phonemize(text, language='en-us', backend='espeak', strip=True) | |
| def analyze_audio_phonetically(audio_path, reference_text=None, wav2vec_processor=None, wav2vec_model=None): | |
| """Perform phonetic analysis of the audio compared to reference text (optional, only if local model loaded)""" | |
| if not wav2vec_processor or not wav2vec_model: | |
| return {"detected_phonemes": "[Phoneme analysis not available]"} | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| inputs = wav2vec_processor(audio, sampling_rate=16000, return_tensors="pt") | |
| import torch | |
| with torch.no_grad(): | |
| logits = wav2vec_model(inputs.input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| phoneme_sequence = wav2vec_processor.batch_decode(predicted_ids)[0] | |
| result = {"detected_phonemes": phoneme_sequence} | |
| if reference_text: | |
| reference_phonemes = extract_phonemes(reference_text) | |
| result["reference_phonemes"] = reference_phonemes | |
| result["analysis"] = "Phoneme comparison would be performed here" | |
| return result | |
| def extract_pronunciation_embedding(audio_path, hubert_processor=None, hubert_model=None): | |
| """Extract pronunciation embedding for comparison purposes (optional, only if local model loaded)""" | |
| if not hubert_model or not hubert_processor: | |
| return None | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| inputs = hubert_processor(audio, sampling_rate=16000, return_tensors="pt") | |
| import torch | |
| with torch.no_grad(): | |
| outputs = hubert_model(**inputs) | |
| embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy() | |
| return embedding | |