Spaces:
Runtime error
Runtime error
File size: 4,001 Bytes
991ca47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | """
Utility functions for the Quran Transcription API
"""
import os
import tempfile
import shutil
import logging
from pathlib import Path
from typing import Optional
from fastapi import UploadFile
logger = logging.getLogger(__name__)
def validate_audio_file(
filename: Optional[str],
allowed_formats: list[str]
) -> bool:
"""
Validate audio file format.
Args:
filename: Name of the file to validate
allowed_formats: List of allowed file extensions
Returns:
True if file format is valid, False otherwise
"""
if not filename:
return False
# Get file extension
ext = Path(filename).suffix.lstrip('.').lower()
return ext in allowed_formats
def get_file_size_mb(file_path: str) -> float:
"""Get file size in megabytes"""
return os.path.getsize(file_path) / (1024 * 1024)
async def save_upload_file(
upload_file: UploadFile,
suffix: Optional[str] = None
) -> str:
"""
Save uploaded file to temporary location.
Args:
upload_file: FastAPI UploadFile object
suffix: File suffix/extension (e.g., '.mp3')
Returns:
Path to temporary file
Raises:
IOError: If file save fails
"""
if not suffix:
suffix = Path(upload_file.filename or "").suffix or ".wav"
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
try:
shutil.copyfileobj(upload_file.file, temp_file)
return temp_file.name
except Exception as e:
logger.error(f"Error saving upload file: {e}")
# Clean up if error occurs
if os.path.exists(temp_file.name):
os.remove(temp_file.name)
raise IOError(f"Failed to save upload file: {str(e)}")
finally:
temp_file.close()
def cleanup_temp_file(file_path: str) -> None:
"""
Remove temporary file.
Args:
file_path: Path to temporary file
"""
try:
if file_path and os.path.exists(file_path):
os.remove(file_path)
logger.debug(f"Cleaned up temp file: {file_path}")
except Exception as e:
logger.warning(f"Failed to clean up temp file {file_path}: {e}")
def format_duration(seconds: float) -> str:
"""
Format duration in seconds to human-readable format.
Args:
seconds: Duration in seconds
Returns:
Formatted duration string (e.g., "1h 30m 45s")
"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds % 1) * 1000)
if hours > 0:
return f"{hours}h {minutes}m {secs}s"
elif minutes > 0:
return f"{minutes}m {secs}s"
elif seconds >= 1:
return f"{secs}s {millis}ms"
else:
return f"{millis}ms"
def get_model_info() -> dict:
"""Get information about the loaded model"""
return {
"name": "OdyAsh/faster-whisper-base-ar-quran",
"base_model": "tarteel-ai/whisper-base-ar-quran",
"origin": "OpenAI Whisper (base)",
"language": "Arabic (ar)",
"optimized_for": "Quranic recitations",
"framework": "CTranslate2",
"quantization_options": ["float32", "float16", "int8"],
"repository": "https://huggingface.co/OdyAsh/faster-whisper-base-ar-quran"
}
def sanitize_filename(filename: str, max_length: int = 255) -> str:
"""
Sanitize filename by removing invalid characters.
Args:
filename: Original filename
max_length: Maximum length for filename
Returns:
Sanitized filename
"""
import re
# Remove special characters
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
# Replace spaces with underscores
sanitized = sanitized.replace(' ', '_')
# Limit length
sanitized = sanitized[:max_length]
# Ensure not empty
if not sanitized:
sanitized = "audio"
return sanitized
|