|
|
""" |
|
|
Annotation Helper Utilities |
|
|
|
|
|
Helper functions for phoneme-level annotation tasks. |
|
|
""" |
|
|
|
|
|
import json |
|
|
import logging |
|
|
from pathlib import Path |
|
|
from typing import List, Dict, Any, Optional |
|
|
import numpy as np |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
def load_annotations(annotations_file: Path = Path("data/annotations.json")) -> List[Dict[str, Any]]: |
|
|
"""Load annotations from JSON file.""" |
|
|
if not annotations_file.exists(): |
|
|
logger.warning(f"Annotations file not found: {annotations_file}") |
|
|
return [] |
|
|
|
|
|
try: |
|
|
with open(annotations_file, 'r', encoding='utf-8') as f: |
|
|
return json.load(f) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load annotations: {e}") |
|
|
return [] |
|
|
|
|
|
|
|
|
def save_annotations(annotations: List[Dict[str, Any]], annotations_file: Path = Path("data/annotations.json")): |
|
|
"""Save annotations to JSON file.""" |
|
|
annotations_file.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
with open(annotations_file, 'w', encoding='utf-8') as f: |
|
|
json.dump(annotations, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
logger.info(f"Saved {len(annotations)} annotations to {annotations_file}") |
|
|
|
|
|
|
|
|
def get_annotation_statistics(annotations: List[Dict[str, Any]]) -> Dict[str, Any]: |
|
|
"""Calculate statistics from annotations.""" |
|
|
total_samples = len(annotations) |
|
|
total_errors = sum(a.get('total_errors', 0) for a in annotations) |
|
|
|
|
|
error_types = { |
|
|
'substitution': 0, |
|
|
'omission': 0, |
|
|
'distortion': 0, |
|
|
'stutter': 0, |
|
|
'normal': 0 |
|
|
} |
|
|
|
|
|
phoneme_errors = {} |
|
|
|
|
|
for ann in annotations: |
|
|
for err in ann.get('phoneme_errors', []): |
|
|
err_type = err.get('error_type', 'normal') |
|
|
error_types[err_type] = error_types.get(err_type, 0) + 1 |
|
|
|
|
|
phoneme = err.get('phoneme', 'unknown') |
|
|
if phoneme not in phoneme_errors: |
|
|
phoneme_errors[phoneme] = 0 |
|
|
phoneme_errors[phoneme] += 1 |
|
|
|
|
|
return { |
|
|
'total_samples': total_samples, |
|
|
'total_errors': total_errors, |
|
|
'error_types': error_types, |
|
|
'phoneme_errors': phoneme_errors, |
|
|
'avg_errors_per_sample': total_errors / total_samples if total_samples > 0 else 0.0 |
|
|
} |
|
|
|
|
|
|
|
|
def export_for_training( |
|
|
annotations: List[Dict[str, Any]], |
|
|
output_file: Path = Path("data/training_dataset.json") |
|
|
) -> Dict[str, Any]: |
|
|
"""Export annotations in training-ready format.""" |
|
|
training_data = [] |
|
|
|
|
|
for ann in annotations: |
|
|
audio_file = ann.get('audio_file') |
|
|
expected_text = ann.get('expected_text', '') |
|
|
duration = ann.get('duration', 0.0) |
|
|
|
|
|
|
|
|
num_frames = int((duration * 1000) / 20) |
|
|
frame_labels = [0] * num_frames |
|
|
|
|
|
|
|
|
for err in ann.get('phoneme_errors', []): |
|
|
frame_id = err.get('frame_id', 0) |
|
|
err_type = err.get('error_type', 'normal') |
|
|
|
|
|
|
|
|
class_id = { |
|
|
'normal': 0, |
|
|
'substitution': 1, |
|
|
'omission': 2, |
|
|
'distortion': 3, |
|
|
'stutter': 4 |
|
|
}.get(err_type, 0) |
|
|
|
|
|
|
|
|
if err_type != 'normal' and err_type != 'stutter': |
|
|
|
|
|
if any(e.get('error_type') == 'stutter' for e in ann.get('phoneme_errors', []) |
|
|
if e.get('frame_id') == frame_id): |
|
|
class_id += 4 |
|
|
|
|
|
if 0 <= frame_id < num_frames: |
|
|
frame_labels[frame_id] = class_id |
|
|
|
|
|
training_data.append({ |
|
|
'audio_file': audio_file, |
|
|
'expected_text': expected_text, |
|
|
'duration': duration, |
|
|
'num_frames': num_frames, |
|
|
'frame_labels': frame_labels, |
|
|
'phoneme_errors': ann.get('phoneme_errors', []) |
|
|
}) |
|
|
|
|
|
output_file.parent.mkdir(parents=True, exist_ok=True) |
|
|
with open(output_file, 'w', encoding='utf-8') as f: |
|
|
json.dump(training_data, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
logger.info(f"Exported {len(training_data)} samples for training to {output_file}") |
|
|
|
|
|
return { |
|
|
'samples': len(training_data), |
|
|
'output_file': str(output_file) |
|
|
} |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
annotations = load_annotations() |
|
|
stats = get_annotation_statistics(annotations) |
|
|
|
|
|
print(f"Total samples: {stats['total_samples']}") |
|
|
print(f"Total errors: {stats['total_errors']}") |
|
|
print(f"Error types: {stats['error_types']}") |
|
|
|
|
|
if annotations: |
|
|
export_for_training(annotations) |
|
|
|
|
|
|