File size: 4,833 Bytes
1cd6149 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
"""
Annotation Helper Utilities
Helper functions for phoneme-level annotation tasks.
"""
import json
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional
import numpy as np
logger = logging.getLogger(__name__)
def load_annotations(annotations_file: Path = Path("data/annotations.json")) -> List[Dict[str, Any]]:
"""Load annotations from JSON file."""
if not annotations_file.exists():
logger.warning(f"Annotations file not found: {annotations_file}")
return []
try:
with open(annotations_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load annotations: {e}")
return []
def save_annotations(annotations: List[Dict[str, Any]], annotations_file: Path = Path("data/annotations.json")):
"""Save annotations to JSON file."""
annotations_file.parent.mkdir(parents=True, exist_ok=True)
with open(annotations_file, 'w', encoding='utf-8') as f:
json.dump(annotations, f, indent=2, ensure_ascii=False)
logger.info(f"Saved {len(annotations)} annotations to {annotations_file}")
def get_annotation_statistics(annotations: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Calculate statistics from annotations."""
total_samples = len(annotations)
total_errors = sum(a.get('total_errors', 0) for a in annotations)
error_types = {
'substitution': 0,
'omission': 0,
'distortion': 0,
'stutter': 0,
'normal': 0
}
phoneme_errors = {}
for ann in annotations:
for err in ann.get('phoneme_errors', []):
err_type = err.get('error_type', 'normal')
error_types[err_type] = error_types.get(err_type, 0) + 1
phoneme = err.get('phoneme', 'unknown')
if phoneme not in phoneme_errors:
phoneme_errors[phoneme] = 0
phoneme_errors[phoneme] += 1
return {
'total_samples': total_samples,
'total_errors': total_errors,
'error_types': error_types,
'phoneme_errors': phoneme_errors,
'avg_errors_per_sample': total_errors / total_samples if total_samples > 0 else 0.0
}
def export_for_training(
annotations: List[Dict[str, Any]],
output_file: Path = Path("data/training_dataset.json")
) -> Dict[str, Any]:
"""Export annotations in training-ready format."""
training_data = []
for ann in annotations:
audio_file = ann.get('audio_file')
expected_text = ann.get('expected_text', '')
duration = ann.get('duration', 0.0)
# Create frame-level labels
num_frames = int((duration * 1000) / 20) # 20ms frames
frame_labels = [0] * num_frames # 0 = normal
# Map errors to frames
for err in ann.get('phoneme_errors', []):
frame_id = err.get('frame_id', 0)
err_type = err.get('error_type', 'normal')
# Map to 8-class system
class_id = {
'normal': 0,
'substitution': 1,
'omission': 2,
'distortion': 3,
'stutter': 4
}.get(err_type, 0)
# Check if stutter + articulation error
if err_type != 'normal' and err_type != 'stutter':
# Check if there's also stutter
if any(e.get('error_type') == 'stutter' for e in ann.get('phoneme_errors', [])
if e.get('frame_id') == frame_id):
class_id += 4 # Add 4 for stutter classes (5-7)
if 0 <= frame_id < num_frames:
frame_labels[frame_id] = class_id
training_data.append({
'audio_file': audio_file,
'expected_text': expected_text,
'duration': duration,
'num_frames': num_frames,
'frame_labels': frame_labels,
'phoneme_errors': ann.get('phoneme_errors', [])
})
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(training_data, f, indent=2, ensure_ascii=False)
logger.info(f"Exported {len(training_data)} samples for training to {output_file}")
return {
'samples': len(training_data),
'output_file': str(output_file)
}
if __name__ == "__main__":
# Example usage
annotations = load_annotations()
stats = get_annotation_statistics(annotations)
print(f"Total samples: {stats['total_samples']}")
print(f"Total errors: {stats['total_errors']}")
print(f"Error types: {stats['error_types']}")
if annotations:
export_for_training(annotations)
|