|
|
""" |
|
|
Data validation utilities for Medical Transcriber application. |
|
|
|
|
|
Provides validation functions for audio files, text, patient data, etc. |
|
|
""" |
|
|
|
|
|
from pathlib import Path |
|
|
from typing import Tuple, Optional |
|
|
|
|
|
from .constants import AudioFormats, ValidationRules |
|
|
from .exceptions import ValidationException, AudioFileException |
|
|
from .logger import get_logger |
|
|
|
|
|
logger = get_logger(__name__) |
|
|
|
|
|
|
|
|
class Validator: |
|
|
"""Centralized validation utility.""" |
|
|
|
|
|
@staticmethod |
|
|
def validate_audio_file(file_path: str) -> Path: |
|
|
""" |
|
|
Validate audio file existence and format. |
|
|
|
|
|
Args: |
|
|
file_path: Path to audio file |
|
|
|
|
|
Returns: |
|
|
Validated Path object |
|
|
|
|
|
Raises: |
|
|
AudioFileException: If file doesn't exist or invalid format |
|
|
ValidationException: If file path is invalid |
|
|
""" |
|
|
logger.debug(f"Validating audio file: {file_path}") |
|
|
|
|
|
if not file_path: |
|
|
logger.error("Audio file path is required") |
|
|
raise ValidationException("audio_file", "", "Audio file path is required") |
|
|
|
|
|
audio_path = Path(file_path) |
|
|
|
|
|
if not audio_path.exists(): |
|
|
logger.error(f"Audio file not found: {audio_path}") |
|
|
raise AudioFileException(str(audio_path), "File does not exist") |
|
|
|
|
|
if not audio_path.is_file(): |
|
|
logger.error(f"Path is not a file: {audio_path}") |
|
|
raise AudioFileException(str(audio_path), "Path is not a file") |
|
|
|
|
|
if audio_path.suffix.lower() not in AudioFormats.SUPPORTED_EXTENSIONS: |
|
|
logger.error(f"Unsupported audio format: {audio_path.suffix}") |
|
|
raise AudioFileException( |
|
|
str(audio_path), |
|
|
f"Unsupported format. Supported: {', '.join(AudioFormats.SUPPORTED_EXTENSIONS)}" |
|
|
) |
|
|
|
|
|
if audio_path.stat().st_size == 0: |
|
|
logger.error(f"Audio file is empty: {audio_path}") |
|
|
raise AudioFileException(str(audio_path), "File is empty") |
|
|
|
|
|
logger.info(f"✓ Audio file validated: {audio_path} ({audio_path.stat().st_size} bytes)") |
|
|
|
|
|
return audio_path |
|
|
|
|
|
@staticmethod |
|
|
def validate_text(text: str, field_name: str = "text") -> str: |
|
|
""" |
|
|
Validate text content. |
|
|
|
|
|
Args: |
|
|
text: Text to validate |
|
|
field_name: Name of the field for error messages |
|
|
|
|
|
Returns: |
|
|
Validated text |
|
|
|
|
|
Raises: |
|
|
ValidationException: If text is invalid |
|
|
""" |
|
|
logger.debug(f"Validating text field '{field_name}': {len(text)} chars") |
|
|
|
|
|
if not text: |
|
|
logger.error(f"Text field '{field_name}' cannot be empty") |
|
|
raise ValidationException(field_name, "", "Text cannot be empty") |
|
|
|
|
|
if len(text) < ValidationRules.MIN_TEXT_LENGTH: |
|
|
logger.error(f"Text field '{field_name}' is too short ({len(text)} < {ValidationRules.MIN_TEXT_LENGTH})") |
|
|
raise ValidationException( |
|
|
field_name, |
|
|
text, |
|
|
f"Text must be at least {ValidationRules.MIN_TEXT_LENGTH} characters" |
|
|
) |
|
|
|
|
|
if len(text) > ValidationRules.MAX_TEXT_LENGTH: |
|
|
logger.error(f"Text field '{field_name}' is too long ({len(text)} > {ValidationRules.MAX_TEXT_LENGTH})") |
|
|
raise ValidationException( |
|
|
field_name, |
|
|
text[:50], |
|
|
f"Text exceeds maximum length of {ValidationRules.MAX_TEXT_LENGTH} characters" |
|
|
) |
|
|
|
|
|
logger.info(f"✓ Text field '{field_name}' validated: {len(text.strip())} chars") |
|
|
return text.strip() |
|
|
|
|
|
@staticmethod |
|
|
def validate_patient_name(name: Optional[str]) -> Optional[str]: |
|
|
""" |
|
|
Validate patient name. |
|
|
|
|
|
Args: |
|
|
name: Patient name |
|
|
|
|
|
Returns: |
|
|
Validated name or None |
|
|
|
|
|
Raises: |
|
|
ValidationException: If name format is invalid |
|
|
""" |
|
|
logger.debug(f"Validating patient name: {name}") |
|
|
|
|
|
if not name: |
|
|
logger.debug("Patient name is optional, skipping validation") |
|
|
return None |
|
|
|
|
|
name = name.strip() |
|
|
|
|
|
if len(name) < 3: |
|
|
logger.error(f"Patient name too short: '{name}' ({len(name)} < 3)") |
|
|
raise ValidationException( |
|
|
"patient_name", |
|
|
name, |
|
|
"Patient name must be at least 3 characters" |
|
|
) |
|
|
|
|
|
|
|
|
if not all(c.isalpha() or c.isspace() or c == '-' for c in name): |
|
|
logger.error(f"Patient name contains invalid characters: '{name}'") |
|
|
raise ValidationException( |
|
|
"patient_name", |
|
|
name, |
|
|
"Patient name can only contain letters, spaces, and hyphens" |
|
|
) |
|
|
|
|
|
logger.info(f"✓ Patient name validated: '{name}'") |
|
|
return name |
|
|
|
|
|
@staticmethod |
|
|
def validate_date(date_str: Optional[str], date_format: str = "%d.%m.%Y") -> Optional[str]: |
|
|
""" |
|
|
Validate date format. |
|
|
|
|
|
Args: |
|
|
date_str: Date string to validate |
|
|
date_format: Expected date format |
|
|
|
|
|
Returns: |
|
|
Validated date string or None |
|
|
|
|
|
Raises: |
|
|
ValidationException: If date format is invalid |
|
|
""" |
|
|
logger.debug(f"Validating date: '{date_str}' (format: {date_format})") |
|
|
|
|
|
if not date_str: |
|
|
logger.debug("Date is optional, skipping validation") |
|
|
return None |
|
|
|
|
|
date_str = date_str.strip() |
|
|
|
|
|
try: |
|
|
from datetime import datetime |
|
|
datetime.strptime(date_str, date_format) |
|
|
logger.info(f"✓ Date validated: '{date_str}'") |
|
|
return date_str |
|
|
except ValueError as e: |
|
|
logger.error(f"Invalid date format: '{date_str}' (expected: {date_format})") |
|
|
raise ValidationException( |
|
|
"date", |
|
|
date_str, |
|
|
f"Invalid date format. Expected: {date_format}" |
|
|
) |
|
|
|
|
|
@staticmethod |
|
|
def validate_api_key(api_key: Optional[str]) -> Optional[str]: |
|
|
""" |
|
|
Validate API key format. |
|
|
|
|
|
Args: |
|
|
api_key: API key string |
|
|
|
|
|
Returns: |
|
|
Validated API key or None |
|
|
|
|
|
Raises: |
|
|
ValidationException: If API key is invalid |
|
|
""" |
|
|
logger.debug("Validating API key (hidden for security)") |
|
|
|
|
|
if not api_key: |
|
|
logger.debug("API key is optional, skipping validation") |
|
|
return None |
|
|
|
|
|
api_key = api_key.strip() |
|
|
|
|
|
if len(api_key) < 10: |
|
|
logger.error("API key seems too short to be valid") |
|
|
raise ValidationException( |
|
|
"api_key", |
|
|
"***", |
|
|
"API key seems too short to be valid" |
|
|
) |
|
|
|
|
|
logger.info(f"✓ API key validated ({len(api_key)} chars)") |
|
|
return api_key |
|
|
|
|
|
@staticmethod |
|
|
def validate_file_path(path_str: str, must_exist: bool = False) -> Path: |
|
|
""" |
|
|
Validate file or directory path. |
|
|
|
|
|
Args: |
|
|
path_str: Path string |
|
|
must_exist: Whether path must exist |
|
|
|
|
|
Returns: |
|
|
Validated Path object |
|
|
|
|
|
Raises: |
|
|
ValidationException: If path is invalid |
|
|
""" |
|
|
logger.debug(f"Validating file path: {path_str} (must_exist={must_exist})") |
|
|
|
|
|
if not path_str: |
|
|
logger.error("Path cannot be empty") |
|
|
raise ValidationException("path", "", "Path cannot be empty") |
|
|
|
|
|
try: |
|
|
path = Path(path_str).resolve() |
|
|
|
|
|
if must_exist and not path.exists(): |
|
|
logger.error(f"Path does not exist: {path}") |
|
|
raise ValidationException( |
|
|
"path", |
|
|
str(path), |
|
|
"Path does not exist" |
|
|
) |
|
|
|
|
|
logger.info(f"✓ File path validated: {path}") |
|
|
|
|
|
return path |
|
|
except (ValueError, OSError) as e: |
|
|
raise ValidationException("path", path_str, f"Invalid path: {str(e)}") |
|
|
|