| """ |
| Compute Voice Clarity Score from audio file |
| """ |
|
|
| import librosa |
| import numpy as np |
| from typing import Dict, Any |
| from .vcs import calculate_voice_clarity_score, get_clarity_insight |
|
|
| def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]: |
| """ |
| Compute Voice Clarity Score and its components from a speech sample. |
| |
| Args: |
| file_path (str): Path to the audio file. |
| whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper) |
| |
| Returns: |
| dict: A dictionary containing Voice Clarity Score and component scores. |
| """ |
| |
| result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False) |
| transcript = result.get("text", "").strip() |
| segments = result.get("segments", []) |
|
|
| |
| if not transcript or not segments: |
| raise ValueError("Empty transcript or segments from Whisper.") |
|
|
| |
| y, sr = librosa.load(file_path, sr=None) |
| duration = len(y) / sr if sr else 0.0 |
| if duration <= 0: |
| raise ValueError("Audio duration invalid or zero.") |
| |
| |
| clarity_result = calculate_voice_clarity_score(y, sr, segments) |
| |
| |
| |
| word_count = len(transcript.split()) |
| clarity_result["components"]["word_count"] = word_count |
| clarity_result["components"]["duration"] = duration |
| |
| return clarity_result |
|
|
| def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]: |
| """ |
| Comprehensive voice quality analysis including clarity. |
| |
| Args: |
| file_path (str): Path to the audio file |
| whisper_model: Transcription model |
| |
| Returns: |
| Dict[str, Any]: Complete voice quality analysis |
| """ |
| |
| clarity_results = compute_voice_clarity_score(file_path, whisper_model) |
| vcs = clarity_results["VCS"] |
| |
| |
| |
| combined_results = { |
| "VCS": vcs, |
| } |
| |
| return combined_results |
|
|
| |
| __all__ = ['compute_voice_clarity_score', 'analyze_voice_quality'] |