File size: 4,001 Bytes
991ca47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Utility functions for the Quran Transcription API
"""

import os
import tempfile
import shutil
import logging
from pathlib import Path
from typing import Optional
from fastapi import UploadFile

logger = logging.getLogger(__name__)


def validate_audio_file(
    filename: Optional[str],
    allowed_formats: list[str]
) -> bool:
    """
    Validate audio file format.
    
    Args:
        filename: Name of the file to validate
        allowed_formats: List of allowed file extensions
        
    Returns:
        True if file format is valid, False otherwise
    """
    if not filename:
        return False
    
    # Get file extension
    ext = Path(filename).suffix.lstrip('.').lower()
    return ext in allowed_formats


def get_file_size_mb(file_path: str) -> float:
    """Get file size in megabytes"""
    return os.path.getsize(file_path) / (1024 * 1024)


async def save_upload_file(
    upload_file: UploadFile,
    suffix: Optional[str] = None
) -> str:
    """
    Save uploaded file to temporary location.
    
    Args:
        upload_file: FastAPI UploadFile object
        suffix: File suffix/extension (e.g., '.mp3')
        
    Returns:
        Path to temporary file
        
    Raises:
        IOError: If file save fails
    """
    if not suffix:
        suffix = Path(upload_file.filename or "").suffix or ".wav"
    
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
    try:
        shutil.copyfileobj(upload_file.file, temp_file)
        return temp_file.name
    except Exception as e:
        logger.error(f"Error saving upload file: {e}")
        # Clean up if error occurs
        if os.path.exists(temp_file.name):
            os.remove(temp_file.name)
        raise IOError(f"Failed to save upload file: {str(e)}")
    finally:
        temp_file.close()


def cleanup_temp_file(file_path: str) -> None:
    """
    Remove temporary file.
    
    Args:
        file_path: Path to temporary file
    """
    try:
        if file_path and os.path.exists(file_path):
            os.remove(file_path)
            logger.debug(f"Cleaned up temp file: {file_path}")
    except Exception as e:
        logger.warning(f"Failed to clean up temp file {file_path}: {e}")


def format_duration(seconds: float) -> str:
    """
    Format duration in seconds to human-readable format.
    
    Args:
        seconds: Duration in seconds
        
    Returns:
        Formatted duration string (e.g., "1h 30m 45s")
    """
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    millis = int((seconds % 1) * 1000)
    
    if hours > 0:
        return f"{hours}h {minutes}m {secs}s"
    elif minutes > 0:
        return f"{minutes}m {secs}s"
    elif seconds >= 1:
        return f"{secs}s {millis}ms"
    else:
        return f"{millis}ms"


def get_model_info() -> dict:
    """Get information about the loaded model"""
    return {
        "name": "OdyAsh/faster-whisper-base-ar-quran",
        "base_model": "tarteel-ai/whisper-base-ar-quran",
        "origin": "OpenAI Whisper (base)",
        "language": "Arabic (ar)",
        "optimized_for": "Quranic recitations",
        "framework": "CTranslate2",
        "quantization_options": ["float32", "float16", "int8"],
        "repository": "https://huggingface.co/OdyAsh/faster-whisper-base-ar-quran"
    }


def sanitize_filename(filename: str, max_length: int = 255) -> str:
    """
    Sanitize filename by removing invalid characters.
    
    Args:
        filename: Original filename
        max_length: Maximum length for filename
        
    Returns:
        Sanitized filename
    """
    import re
    
    # Remove special characters
    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
    # Replace spaces with underscores
    sanitized = sanitized.replace(' ', '_')
    # Limit length
    sanitized = sanitized[:max_length]
    # Ensure not empty
    if not sanitized:
        sanitized = "audio"
    
    return sanitized