Spaces:

anfastech
/

zlaqa-version-c-ai-enginee

Sleeping

App Files Files Community

zlaqa-version-c-ai-enginee / scripts /annotation_helper.py

anfastech

New: Phoneme-level speech pathology diagnosis MVP with real-time streaming

1cd6149 about 1 month ago

raw

history blame contribute delete

4.83 kB

	"""
	Annotation Helper Utilities

	Helper functions for phoneme-level annotation tasks.
	"""

	import json
	import logging
	from pathlib import Path
	from typing import List, Dict, Any, Optional
	import numpy as np

	logger = logging.getLogger(__name__)


	def load_annotations(annotations_file: Path = Path("data/annotations.json")) -> List[Dict[str, Any]]:
	"""Load annotations from JSON file."""
	if not annotations_file.exists():
	logger.warning(f"Annotations file not found: {annotations_file}")
	return []

	try:
	with open(annotations_file, 'r', encoding='utf-8') as f:
	return json.load(f)
	except Exception as e:
	logger.error(f"Failed to load annotations: {e}")
	return []


	def save_annotations(annotations: List[Dict[str, Any]], annotations_file: Path = Path("data/annotations.json")):
	"""Save annotations to JSON file."""
	annotations_file.parent.mkdir(parents=True, exist_ok=True)

	with open(annotations_file, 'w', encoding='utf-8') as f:
	json.dump(annotations, f, indent=2, ensure_ascii=False)

	logger.info(f"Saved {len(annotations)} annotations to {annotations_file}")


	def get_annotation_statistics(annotations: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""Calculate statistics from annotations."""
	total_samples = len(annotations)
	total_errors = sum(a.get('total_errors', 0) for a in annotations)

	error_types = {
	'substitution': 0,
	'omission': 0,
	'distortion': 0,
	'stutter': 0,
	'normal': 0
	}

	phoneme_errors = {}

	for ann in annotations:
	for err in ann.get('phoneme_errors', []):
	err_type = err.get('error_type', 'normal')
	error_types[err_type] = error_types.get(err_type, 0) + 1

	phoneme = err.get('phoneme', 'unknown')
	if phoneme not in phoneme_errors:
	phoneme_errors[phoneme] = 0
	phoneme_errors[phoneme] += 1

	return {
	'total_samples': total_samples,
	'total_errors': total_errors,
	'error_types': error_types,
	'phoneme_errors': phoneme_errors,
	'avg_errors_per_sample': total_errors / total_samples if total_samples > 0 else 0.0
	}


	def export_for_training(
	annotations: List[Dict[str, Any]],
	output_file: Path = Path("data/training_dataset.json")
	) -> Dict[str, Any]:
	"""Export annotations in training-ready format."""
	training_data = []

	for ann in annotations:
	audio_file = ann.get('audio_file')
	expected_text = ann.get('expected_text', '')
	duration = ann.get('duration', 0.0)

	# Create frame-level labels
	num_frames = int((duration * 1000) / 20) # 20ms frames
	frame_labels = [0] * num_frames # 0 = normal

	# Map errors to frames
	for err in ann.get('phoneme_errors', []):
	frame_id = err.get('frame_id', 0)
	err_type = err.get('error_type', 'normal')

	# Map to 8-class system
	class_id = {
	'normal': 0,
	'substitution': 1,
	'omission': 2,
	'distortion': 3,
	'stutter': 4
	}.get(err_type, 0)

	# Check if stutter + articulation error
	if err_type != 'normal' and err_type != 'stutter':
	# Check if there's also stutter
	if any(e.get('error_type') == 'stutter' for e in ann.get('phoneme_errors', [])
	if e.get('frame_id') == frame_id):
	class_id += 4 # Add 4 for stutter classes (5-7)

	if 0 <= frame_id < num_frames:
	frame_labels[frame_id] = class_id

	training_data.append({
	'audio_file': audio_file,
	'expected_text': expected_text,
	'duration': duration,
	'num_frames': num_frames,
	'frame_labels': frame_labels,
	'phoneme_errors': ann.get('phoneme_errors', [])
	})

	output_file.parent.mkdir(parents=True, exist_ok=True)
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(training_data, f, indent=2, ensure_ascii=False)

	logger.info(f"Exported {len(training_data)} samples for training to {output_file}")

	return {
	'samples': len(training_data),
	'output_file': str(output_file)
	}


	if __name__ == "__main__":
	# Example usage
	annotations = load_annotations()
	stats = get_annotation_statistics(annotations)

	print(f"Total samples: {stats['total_samples']}")
	print(f"Total errors: {stats['total_errors']}")
	print(f"Error types: {stats['error_types']}")

	if annotations:
	export_for_training(annotations)