Spaces:
Runtime error
Runtime error
| import librosa | |
| import numpy as np | |
| from pydub import AudioSegment | |
| # Feature extraction for Intonation | |
| def evaluate_intonation(wav_file): | |
| # Load audio using librosa | |
| y, sr = librosa.load(wav_file) | |
| # 1. Sentence Stress (based on energy) | |
| # Calculate Root mean square energy which represents the perceived loudness or power of the audio, which can be linked to sentence stress | |
| rms_energy = librosa.feature.rms(y=y)[0] | |
| avg_energy = np.mean(rms_energy) * 10 # Scale up for score calculation | |
| avg_energy = float(avg_energy) # Ensure scalar | |
| # 2. Intonation Patterns (based on pitch variation) | |
| # Estimate the pitch (fundamental frequency) of the audio signal over time. | |
| pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
| pitch_values = pitches[magnitudes > np.median(magnitudes)] | |
| if len(pitch_values) > 0: | |
| pitch_variation = np.std(pitch_values) / np.mean(pitch_values) * 10 | |
| else: | |
| pitch_variation = 0 | |
| pitch_variation = float(pitch_variation) # Ensure scalar | |
| # 3. Rhythm (based on tempo) | |
| # Estimates the tempo of the audio in beats per minute (BPM) by tracking the rhythmic structure (detecting beats in the signal). | |
| # Average speaking tempo is ~120 BPM | |
| tempo, _ = librosa.beat.beat_track(y=y, sr=sr) | |
| rhythm_score = (tempo / 120) * 10 # Adjust based on average speaking tempo (120 BPM) | |
| rhythm_score = float(rhythm_score) # Ensure scalar | |
| # Average score as per the formula: (Sentence Stress + Intonation Patterns + Rhythm) / 3 | |
| # Normalize the scores before calculating the final score | |
| sentence_stress_score = min(max(avg_energy, 0), 10) # Ensure it's within [0, 10] | |
| intonation_patterns_score = min(max(pitch_variation, 0), 10) # Ensure it's within [0, 10] | |
| rhythm_score = min(max(rhythm_score, 0), 10) # Ensure it's within [0, 10] | |
| # Intonation score calculation | |
| intonation_score = (sentence_stress_score + intonation_patterns_score + rhythm_score) / 3 | |
| return { | |
| "sentence_stress": sentence_stress_score, | |
| "intonation_patterns": intonation_patterns_score, | |
| "rhythm": rhythm_score, | |
| "intonation_score": intonation_score, | |
| } | |