zlaqa-version-c-ai-enginee / scripts /annotation_helper.py
anfastech's picture
New: Phoneme-level speech pathology diagnosis MVP with real-time streaming
1cd6149
"""
Annotation Helper Utilities
Helper functions for phoneme-level annotation tasks.
"""
import json
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional
import numpy as np
logger = logging.getLogger(__name__)
def load_annotations(annotations_file: Path = Path("data/annotations.json")) -> List[Dict[str, Any]]:
"""Load annotations from JSON file."""
if not annotations_file.exists():
logger.warning(f"Annotations file not found: {annotations_file}")
return []
try:
with open(annotations_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load annotations: {e}")
return []
def save_annotations(annotations: List[Dict[str, Any]], annotations_file: Path = Path("data/annotations.json")):
"""Save annotations to JSON file."""
annotations_file.parent.mkdir(parents=True, exist_ok=True)
with open(annotations_file, 'w', encoding='utf-8') as f:
json.dump(annotations, f, indent=2, ensure_ascii=False)
logger.info(f"Saved {len(annotations)} annotations to {annotations_file}")
def get_annotation_statistics(annotations: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Calculate statistics from annotations."""
total_samples = len(annotations)
total_errors = sum(a.get('total_errors', 0) for a in annotations)
error_types = {
'substitution': 0,
'omission': 0,
'distortion': 0,
'stutter': 0,
'normal': 0
}
phoneme_errors = {}
for ann in annotations:
for err in ann.get('phoneme_errors', []):
err_type = err.get('error_type', 'normal')
error_types[err_type] = error_types.get(err_type, 0) + 1
phoneme = err.get('phoneme', 'unknown')
if phoneme not in phoneme_errors:
phoneme_errors[phoneme] = 0
phoneme_errors[phoneme] += 1
return {
'total_samples': total_samples,
'total_errors': total_errors,
'error_types': error_types,
'phoneme_errors': phoneme_errors,
'avg_errors_per_sample': total_errors / total_samples if total_samples > 0 else 0.0
}
def export_for_training(
annotations: List[Dict[str, Any]],
output_file: Path = Path("data/training_dataset.json")
) -> Dict[str, Any]:
"""Export annotations in training-ready format."""
training_data = []
for ann in annotations:
audio_file = ann.get('audio_file')
expected_text = ann.get('expected_text', '')
duration = ann.get('duration', 0.0)
# Create frame-level labels
num_frames = int((duration * 1000) / 20) # 20ms frames
frame_labels = [0] * num_frames # 0 = normal
# Map errors to frames
for err in ann.get('phoneme_errors', []):
frame_id = err.get('frame_id', 0)
err_type = err.get('error_type', 'normal')
# Map to 8-class system
class_id = {
'normal': 0,
'substitution': 1,
'omission': 2,
'distortion': 3,
'stutter': 4
}.get(err_type, 0)
# Check if stutter + articulation error
if err_type != 'normal' and err_type != 'stutter':
# Check if there's also stutter
if any(e.get('error_type') == 'stutter' for e in ann.get('phoneme_errors', [])
if e.get('frame_id') == frame_id):
class_id += 4 # Add 4 for stutter classes (5-7)
if 0 <= frame_id < num_frames:
frame_labels[frame_id] = class_id
training_data.append({
'audio_file': audio_file,
'expected_text': expected_text,
'duration': duration,
'num_frames': num_frames,
'frame_labels': frame_labels,
'phoneme_errors': ann.get('phoneme_errors', [])
})
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(training_data, f, indent=2, ensure_ascii=False)
logger.info(f"Exported {len(training_data)} samples for training to {output_file}")
return {
'samples': len(training_data),
'output_file': str(output_file)
}
if __name__ == "__main__":
# Example usage
annotations = load_annotations()
stats = get_annotation_statistics(annotations)
print(f"Total samples: {stats['total_samples']}")
print(f"Total errors: {stats['total_errors']}")
print(f"Error types: {stats['error_types']}")
if annotations:
export_for_training(annotations)