Spaces:
Runtime error
Runtime error
| """ | |
| Utility functions for the Quran Transcription API | |
| """ | |
| import os | |
| import tempfile | |
| import shutil | |
| import logging | |
| from pathlib import Path | |
| from typing import Optional | |
| from fastapi import UploadFile | |
| logger = logging.getLogger(__name__) | |
| def validate_audio_file( | |
| filename: Optional[str], | |
| allowed_formats: list[str] | |
| ) -> bool: | |
| """ | |
| Validate audio file format. | |
| Args: | |
| filename: Name of the file to validate | |
| allowed_formats: List of allowed file extensions | |
| Returns: | |
| True if file format is valid, False otherwise | |
| """ | |
| if not filename: | |
| return False | |
| # Get file extension | |
| ext = Path(filename).suffix.lstrip('.').lower() | |
| return ext in allowed_formats | |
| def get_file_size_mb(file_path: str) -> float: | |
| """Get file size in megabytes""" | |
| return os.path.getsize(file_path) / (1024 * 1024) | |
| async def save_upload_file( | |
| upload_file: UploadFile, | |
| suffix: Optional[str] = None | |
| ) -> str: | |
| """ | |
| Save uploaded file to temporary location. | |
| Args: | |
| upload_file: FastAPI UploadFile object | |
| suffix: File suffix/extension (e.g., '.mp3') | |
| Returns: | |
| Path to temporary file | |
| Raises: | |
| IOError: If file save fails | |
| """ | |
| if not suffix: | |
| suffix = Path(upload_file.filename or "").suffix or ".wav" | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) | |
| try: | |
| shutil.copyfileobj(upload_file.file, temp_file) | |
| return temp_file.name | |
| except Exception as e: | |
| logger.error(f"Error saving upload file: {e}") | |
| # Clean up if error occurs | |
| if os.path.exists(temp_file.name): | |
| os.remove(temp_file.name) | |
| raise IOError(f"Failed to save upload file: {str(e)}") | |
| finally: | |
| temp_file.close() | |
| def cleanup_temp_file(file_path: str) -> None: | |
| """ | |
| Remove temporary file. | |
| Args: | |
| file_path: Path to temporary file | |
| """ | |
| try: | |
| if file_path and os.path.exists(file_path): | |
| os.remove(file_path) | |
| logger.debug(f"Cleaned up temp file: {file_path}") | |
| except Exception as e: | |
| logger.warning(f"Failed to clean up temp file {file_path}: {e}") | |
| def format_duration(seconds: float) -> str: | |
| """ | |
| Format duration in seconds to human-readable format. | |
| Args: | |
| seconds: Duration in seconds | |
| Returns: | |
| Formatted duration string (e.g., "1h 30m 45s") | |
| """ | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = int(seconds % 60) | |
| millis = int((seconds % 1) * 1000) | |
| if hours > 0: | |
| return f"{hours}h {minutes}m {secs}s" | |
| elif minutes > 0: | |
| return f"{minutes}m {secs}s" | |
| elif seconds >= 1: | |
| return f"{secs}s {millis}ms" | |
| else: | |
| return f"{millis}ms" | |
| def get_model_info() -> dict: | |
| """Get information about the loaded model""" | |
| return { | |
| "name": "OdyAsh/faster-whisper-base-ar-quran", | |
| "base_model": "tarteel-ai/whisper-base-ar-quran", | |
| "origin": "OpenAI Whisper (base)", | |
| "language": "Arabic (ar)", | |
| "optimized_for": "Quranic recitations", | |
| "framework": "CTranslate2", | |
| "quantization_options": ["float32", "float16", "int8"], | |
| "repository": "https://huggingface.co/OdyAsh/faster-whisper-base-ar-quran" | |
| } | |
| def sanitize_filename(filename: str, max_length: int = 255) -> str: | |
| """ | |
| Sanitize filename by removing invalid characters. | |
| Args: | |
| filename: Original filename | |
| max_length: Maximum length for filename | |
| Returns: | |
| Sanitized filename | |
| """ | |
| import re | |
| # Remove special characters | |
| sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) | |
| # Replace spaces with underscores | |
| sanitized = sanitized.replace(' ', '_') | |
| # Limit length | |
| sanitized = sanitized[:max_length] | |
| # Ensure not empty | |
| if not sanitized: | |
| sanitized = "audio" | |
| return sanitized | |