|
|
""" |
|
|
Data structures for Medical Transcriber application. |
|
|
|
|
|
Defines typed dataclasses for configuration, results, and metadata. |
|
|
""" |
|
|
|
|
|
from dataclasses import dataclass, field |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
from typing import Optional, List, Dict, Any |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class PatientMetadata: |
|
|
"""Patient information metadata.""" |
|
|
|
|
|
name: Optional[str] = None |
|
|
date_of_birth: Optional[str] = None |
|
|
study_area: Optional[str] = None |
|
|
study_number: Optional[str] = None |
|
|
study_date: Optional[str] = None |
|
|
doctor_name: Optional[str] = None |
|
|
|
|
|
def is_complete(self) -> bool: |
|
|
"""Check if all required patient data is filled.""" |
|
|
return all([self.name, self.date_of_birth, self.study_area]) |
|
|
|
|
|
def to_dict(self) -> Dict[str, Optional[str]]: |
|
|
"""Convert to dictionary.""" |
|
|
return { |
|
|
"name": self.name, |
|
|
"date_of_birth": self.date_of_birth, |
|
|
"study_area": self.study_area, |
|
|
"study_number": self.study_number, |
|
|
"study_date": self.study_date, |
|
|
"doctor_name": self.doctor_name |
|
|
} |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class TranscriptionResult: |
|
|
"""Result of transcription process.""" |
|
|
|
|
|
timestamp: datetime |
|
|
audio_file: Path |
|
|
original_text: str |
|
|
corrected_text: Optional[str] = None |
|
|
corrections: List[Dict[str, str]] = field(default_factory=list) |
|
|
corrections_count: int = 0 |
|
|
|
|
|
def has_corrections(self) -> bool: |
|
|
"""Check if transcription was corrected.""" |
|
|
return self.corrected_text is not None and len(self.corrections) > 0 |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class PipelineStepResult: |
|
|
"""Result of a single pipeline step.""" |
|
|
|
|
|
step_name: str |
|
|
status: str |
|
|
duration: float = 0.0 |
|
|
message: str = "" |
|
|
output_length: Optional[int] = None |
|
|
error: Optional[str] = None |
|
|
|
|
|
def is_successful(self) -> bool: |
|
|
"""Check if step completed successfully.""" |
|
|
return self.status == "success" |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class PipelineResult: |
|
|
"""Complete pipeline processing result.""" |
|
|
|
|
|
timestamp: datetime |
|
|
audio_file: Path |
|
|
patient_data: Optional[PatientMetadata] = None |
|
|
transcription: Optional[TranscriptionResult] = None |
|
|
report_path: Optional[Path] = None |
|
|
steps: List[PipelineStepResult] = field(default_factory=list) |
|
|
status: str = "pending" |
|
|
error_message: Optional[str] = None |
|
|
|
|
|
def is_successful(self) -> bool: |
|
|
"""Check if pipeline completed successfully.""" |
|
|
return self.status == "success" |
|
|
|
|
|
def get_total_duration(self) -> float: |
|
|
"""Calculate total duration of all steps.""" |
|
|
return sum(step.duration for step in self.steps) |
|
|
|
|
|
def to_dict(self) -> Dict[str, Any]: |
|
|
"""Convert to dictionary for JSON serialization.""" |
|
|
return { |
|
|
"timestamp": self.timestamp.isoformat(), |
|
|
"audio_file": str(self.audio_file), |
|
|
"patient_data": self.patient_data.to_dict() if self.patient_data else None, |
|
|
"transcription": { |
|
|
"original": self.transcription.original_text if self.transcription else None, |
|
|
"corrected": self.transcription.corrected_text if self.transcription else None, |
|
|
"corrections_count": self.transcription.corrections_count if self.transcription else 0 |
|
|
} if self.transcription else None, |
|
|
"report_path": str(self.report_path) if self.report_path else None, |
|
|
"steps": [ |
|
|
{ |
|
|
"step": step.step_name, |
|
|
"status": step.status, |
|
|
"duration": step.duration, |
|
|
"message": step.message |
|
|
} |
|
|
for step in self.steps |
|
|
], |
|
|
"status": self.status, |
|
|
"total_duration": self.get_total_duration(), |
|
|
"error": self.error_message |
|
|
} |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class CorrectionChange: |
|
|
"""Single correction change.""" |
|
|
|
|
|
original: str |
|
|
corrected: str |
|
|
position: int = 0 |
|
|
change_type: str = "substitution" |
|
|
confidence: float = 1.0 |
|
|
|
|
|
def to_dict(self) -> Dict[str, Any]: |
|
|
"""Convert to dictionary.""" |
|
|
return { |
|
|
"original": self.original, |
|
|
"corrected": self.corrected, |
|
|
"type": self.change_type, |
|
|
"position": self.position, |
|
|
"confidence": self.confidence |
|
|
} |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ModelInfo: |
|
|
"""Information about loaded model.""" |
|
|
|
|
|
model_name: str |
|
|
model_path: Path |
|
|
device: str |
|
|
dtype: str |
|
|
language: str = "russian" |
|
|
cuda_available: bool = False |
|
|
cuda_device: Optional[str] = None |
|
|
|
|
|
def to_dict(self) -> Dict[str, Any]: |
|
|
"""Convert to dictionary.""" |
|
|
return { |
|
|
"model_name": self.model_name, |
|
|
"model_path": str(self.model_path), |
|
|
"device": self.device, |
|
|
"dtype": self.dtype, |
|
|
"language": self.language, |
|
|
"cuda_available": self.cuda_available, |
|
|
"cuda_device": self.cuda_device |
|
|
} |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class TermValidationResult: |
|
|
"""Result of medical term validation.""" |
|
|
|
|
|
total_terms_found: int |
|
|
terms_by_category: Dict[str, int] = field(default_factory=dict) |
|
|
matched_terms: List[str] = field(default_factory=list) |
|
|
validation_time: float = 0.0 |
|
|
|
|
|
def get_total_categories(self) -> int: |
|
|
"""Get number of categories with matches.""" |
|
|
return len(self.terms_by_category) |
|
|
|
|
|
def to_dict(self) -> Dict[str, Any]: |
|
|
"""Convert to dictionary.""" |
|
|
return { |
|
|
"total_terms_found": self.total_terms_found, |
|
|
"categories": self.terms_by_category, |
|
|
"validation_time": self.validation_time |
|
|
} |
|
|
|