""" Utility functions for the Quran Transcription API """ import os import tempfile import shutil import logging from pathlib import Path from typing import Optional from fastapi import UploadFile logger = logging.getLogger(__name__) def validate_audio_file( filename: Optional[str], allowed_formats: list[str] ) -> bool: """ Validate audio file format. Args: filename: Name of the file to validate allowed_formats: List of allowed file extensions Returns: True if file format is valid, False otherwise """ if not filename: return False # Get file extension ext = Path(filename).suffix.lstrip('.').lower() return ext in allowed_formats def get_file_size_mb(file_path: str) -> float: """Get file size in megabytes""" return os.path.getsize(file_path) / (1024 * 1024) async def save_upload_file( upload_file: UploadFile, suffix: Optional[str] = None ) -> str: """ Save uploaded file to temporary location. Args: upload_file: FastAPI UploadFile object suffix: File suffix/extension (e.g., '.mp3') Returns: Path to temporary file Raises: IOError: If file save fails """ if not suffix: suffix = Path(upload_file.filename or "").suffix or ".wav" temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) try: shutil.copyfileobj(upload_file.file, temp_file) return temp_file.name except Exception as e: logger.error(f"Error saving upload file: {e}") # Clean up if error occurs if os.path.exists(temp_file.name): os.remove(temp_file.name) raise IOError(f"Failed to save upload file: {str(e)}") finally: temp_file.close() def cleanup_temp_file(file_path: str) -> None: """ Remove temporary file. Args: file_path: Path to temporary file """ try: if file_path and os.path.exists(file_path): os.remove(file_path) logger.debug(f"Cleaned up temp file: {file_path}") except Exception as e: logger.warning(f"Failed to clean up temp file {file_path}: {e}") def format_duration(seconds: float) -> str: """ Format duration in seconds to human-readable format. Args: seconds: Duration in seconds Returns: Formatted duration string (e.g., "1h 30m 45s") """ hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = int(seconds % 60) millis = int((seconds % 1) * 1000) if hours > 0: return f"{hours}h {minutes}m {secs}s" elif minutes > 0: return f"{minutes}m {secs}s" elif seconds >= 1: return f"{secs}s {millis}ms" else: return f"{millis}ms" def get_model_info() -> dict: """Get information about the loaded model""" return { "name": "OdyAsh/faster-whisper-base-ar-quran", "base_model": "tarteel-ai/whisper-base-ar-quran", "origin": "OpenAI Whisper (base)", "language": "Arabic (ar)", "optimized_for": "Quranic recitations", "framework": "CTranslate2", "quantization_options": ["float32", "float16", "int8"], "repository": "https://huggingface.co/OdyAsh/faster-whisper-base-ar-quran" } def sanitize_filename(filename: str, max_length: int = 255) -> str: """ Sanitize filename by removing invalid characters. Args: filename: Original filename max_length: Maximum length for filename Returns: Sanitized filename """ import re # Remove special characters sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) # Replace spaces with underscores sanitized = sanitized.replace(' ', '_') # Limit length sanitized = sanitized[:max_length] # Ensure not empty if not sanitized: sanitized = "audio" return sanitized