Spaces:
Running
Running
File size: 8,959 Bytes
a646649 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | """Enhanced error handling and recovery mechanisms."""
import logging
import traceback
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Any
from contextlib import contextmanager
logger = logging.getLogger(__name__)
class ErrorSeverity(Enum):
"""Error severity levels for classification."""
LOW = "low" # Warnings, non-critical issues
MEDIUM = "medium" # Recoverable errors
HIGH = "high" # Critical errors requiring user intervention
FATAL = "fatal" # Unrecoverable errors
class CaptionToolError(Exception):
"""Base exception class for caption tool errors."""
def __init__(self, message: str, severity: ErrorSeverity = ErrorSeverity.MEDIUM,
suggestions: Optional[List[str]] = None):
super().__init__(message)
self.severity = severity
self.suggestions = suggestions or []
def get_user_message(self) -> str:
"""Get user-friendly error message with suggestions."""
msg = f"❌ {self.severity.value.upper()}: {str(self)}"
if self.suggestions:
msg += "\n\n💡 Suggestions:"
for i, suggestion in enumerate(self.suggestions, 1):
msg += f"\n {i}. {suggestion}"
return msg
class AudioValidationError(CaptionToolError):
"""Errors related to audio file validation."""
pass
class ScriptValidationError(CaptionToolError):
"""Errors related to script file validation."""
pass
class AlignmentError(CaptionToolError):
"""Errors during the alignment process."""
pass
class ModelError(CaptionToolError):
"""Errors related to model loading/downloading."""
pass
class ErrorRecovery:
"""Error recovery and retry mechanisms."""
@staticmethod
@contextmanager
def retry_on_failure(max_retries: int = 3, delay: float = 1.0,
exceptions: tuple = (Exception,)):
"""Retry operation with exponential backoff."""
import time
for attempt in range(max_retries + 1):
try:
yield attempt
break
except exceptions as e:
if attempt == max_retries:
raise
wait_time = delay * (2 ** attempt)
logger.warning(f"Attempt {attempt + 1} failed: {e}. "
f"Retrying in {wait_time}s...")
time.sleep(wait_time)
@staticmethod
def diagnose_alignment_failure(audio_path: Path, script_path: Path) -> List[str]:
"""Diagnose common alignment failure causes."""
suggestions = []
# Check file sizes
audio_size = audio_path.stat().st_size
script_size = script_path.stat().st_size
if audio_size < 1024: # Very small audio file
suggestions.append("Audio file seems too small - ensure it contains speech")
if script_size < 10: # Very small script
suggestions.append("Script file seems too short - ensure it contains text")
# Check script content
try:
with open(script_path, 'r', encoding='utf-8') as f:
content = f.read().strip()
if len(content.split()) < 5:
suggestions.append("Script contains very few words - alignment may be unreliable")
if not any('\u0600' <= c <= '\u06FF' for c in content):
suggestions.append("Script contains no Arabic text - ensure language setting is correct")
except Exception:
suggestions.append("Cannot read script file - check encoding (should be UTF-8)")
# Audio duration check
try:
import subprocess
cmd = ['ffprobe', '-v', 'quiet', '-show_entries',
'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1',
str(audio_path)]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
duration = float(result.stdout.strip())
if duration < 1.0:
suggestions.append("Audio is very short - ensure it contains sufficient speech")
elif duration > 300: # 5 minutes
suggestions.append("Audio is very long - consider splitting into smaller segments")
except Exception:
suggestions.append("Cannot determine audio duration - ensure file is valid")
return suggestions
@staticmethod
def suggest_recovery_actions(error: Exception, context: Dict[str, Any]) -> List[str]:
"""Suggest recovery actions based on error type and context."""
suggestions = []
error_str = str(error).lower()
if "memory" in error_str or "out of memory" in error_str:
suggestions.extend([
"Free up system memory by closing other applications",
"Try processing smaller audio segments",
"Use sentence-level alignment instead of word-level",
"Restart the script to clear memory"
])
elif "network" in error_str or "connection" in error_str or "download" in error_str:
suggestions.extend([
"Check your internet connection",
"Try again in a few minutes (server may be busy)",
"Use a VPN if in a restricted network",
"Clear the model cache directory and retry"
])
elif "permission" in error_str or "access" in error_str:
suggestions.extend([
"Check file permissions for input/output directories",
"Run as administrator if necessary",
"Ensure output directory is writable"
])
elif "format" in error_str or "codec" in error_str:
suggestions.extend([
"Convert audio to a supported format (MP3, WAV, M4A)",
"Ensure audio has speech content (not just music/silence)",
"Check if audio file is corrupted"
])
elif "alignment failed" in error_str:
audio_path = context.get('audio_path')
script_path = context.get('script_path')
if audio_path and script_path:
suggestions.extend(
ErrorRecovery.diagnose_alignment_failure(audio_path, script_path)
)
return suggestions
class ErrorLogger:
"""Enhanced error logging with context."""
def __init__(self, log_file: Optional[Path] = None):
self.log_file = log_file or Path("caption_tool_errors.log")
def log_error(self, error: Exception, context: Dict[str, Any] = None):
"""Log error with full context and stack trace."""
context = context or {}
error_info = {
"error_type": type(error).__name__,
"error_message": str(error),
"context": context,
"stack_trace": traceback.format_exc()
}
# Log to file
try:
with open(self.log_file, 'a', encoding='utf-8') as f:
import json
import datetime
log_entry = {
"timestamp": datetime.datetime.now().isoformat(),
**error_info
}
f.write(json.dumps(log_entry, ensure_ascii=False, indent=2) + "\n\n")
except Exception as e:
logger.error(f"Failed to write error log: {e}")
# Log to console
logger.error(f"Error: {error_info['error_type']}: {error_info['error_message']}")
if context:
logger.error(f"Context: {context}")
def handle_graceful_shutdown(error: Exception, context: Dict[str, Any] = None) -> str:
"""Handle graceful shutdown with user-friendly error reporting."""
context = context or {}
# Log the error
error_logger = ErrorLogger()
error_logger.log_error(error, context)
# Determine error type and provide appropriate response
if isinstance(error, CaptionToolError):
return error.get_user_message()
# For other exceptions, create a generic CaptionToolError
suggestions = ErrorRecovery.suggest_recovery_actions(error, context)
if "memory" in str(error).lower():
severity = ErrorSeverity.HIGH
elif "network" in str(error).lower() or "download" in str(error).lower():
severity = ErrorSeverity.MEDIUM
else:
severity = ErrorSeverity.HIGH
wrapped_error = CaptionToolError(
message=str(error),
severity=severity,
suggestions=suggestions
)
return wrapped_error.get_user_message() |