""" Annotation Helper Utilities Helper functions for phoneme-level annotation tasks. """ import json import logging from pathlib import Path from typing import List, Dict, Any, Optional import numpy as np logger = logging.getLogger(__name__) def load_annotations(annotations_file: Path = Path("data/annotations.json")) -> List[Dict[str, Any]]: """Load annotations from JSON file.""" if not annotations_file.exists(): logger.warning(f"Annotations file not found: {annotations_file}") return [] try: with open(annotations_file, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: logger.error(f"Failed to load annotations: {e}") return [] def save_annotations(annotations: List[Dict[str, Any]], annotations_file: Path = Path("data/annotations.json")): """Save annotations to JSON file.""" annotations_file.parent.mkdir(parents=True, exist_ok=True) with open(annotations_file, 'w', encoding='utf-8') as f: json.dump(annotations, f, indent=2, ensure_ascii=False) logger.info(f"Saved {len(annotations)} annotations to {annotations_file}") def get_annotation_statistics(annotations: List[Dict[str, Any]]) -> Dict[str, Any]: """Calculate statistics from annotations.""" total_samples = len(annotations) total_errors = sum(a.get('total_errors', 0) for a in annotations) error_types = { 'substitution': 0, 'omission': 0, 'distortion': 0, 'stutter': 0, 'normal': 0 } phoneme_errors = {} for ann in annotations: for err in ann.get('phoneme_errors', []): err_type = err.get('error_type', 'normal') error_types[err_type] = error_types.get(err_type, 0) + 1 phoneme = err.get('phoneme', 'unknown') if phoneme not in phoneme_errors: phoneme_errors[phoneme] = 0 phoneme_errors[phoneme] += 1 return { 'total_samples': total_samples, 'total_errors': total_errors, 'error_types': error_types, 'phoneme_errors': phoneme_errors, 'avg_errors_per_sample': total_errors / total_samples if total_samples > 0 else 0.0 } def export_for_training( annotations: List[Dict[str, Any]], output_file: Path = Path("data/training_dataset.json") ) -> Dict[str, Any]: """Export annotations in training-ready format.""" training_data = [] for ann in annotations: audio_file = ann.get('audio_file') expected_text = ann.get('expected_text', '') duration = ann.get('duration', 0.0) # Create frame-level labels num_frames = int((duration * 1000) / 20) # 20ms frames frame_labels = [0] * num_frames # 0 = normal # Map errors to frames for err in ann.get('phoneme_errors', []): frame_id = err.get('frame_id', 0) err_type = err.get('error_type', 'normal') # Map to 8-class system class_id = { 'normal': 0, 'substitution': 1, 'omission': 2, 'distortion': 3, 'stutter': 4 }.get(err_type, 0) # Check if stutter + articulation error if err_type != 'normal' and err_type != 'stutter': # Check if there's also stutter if any(e.get('error_type') == 'stutter' for e in ann.get('phoneme_errors', []) if e.get('frame_id') == frame_id): class_id += 4 # Add 4 for stutter classes (5-7) if 0 <= frame_id < num_frames: frame_labels[frame_id] = class_id training_data.append({ 'audio_file': audio_file, 'expected_text': expected_text, 'duration': duration, 'num_frames': num_frames, 'frame_labels': frame_labels, 'phoneme_errors': ann.get('phoneme_errors', []) }) output_file.parent.mkdir(parents=True, exist_ok=True) with open(output_file, 'w', encoding='utf-8') as f: json.dump(training_data, f, indent=2, ensure_ascii=False) logger.info(f"Exported {len(training_data)} samples for training to {output_file}") return { 'samples': len(training_data), 'output_file': str(output_file) } if __name__ == "__main__": # Example usage annotations = load_annotations() stats = get_annotation_statistics(annotations) print(f"Total samples: {stats['total_samples']}") print(f"Total errors: {stats['total_errors']}") print(f"Error types: {stats['error_types']}") if annotations: export_for_training(annotations)