Spaces:

hetchyy
/

Quran-multi-aligner

Running on Zero

File size: 5,354 Bytes

"""
Utilities for integrating Hugging Face Spaces ZeroGPU without breaking
local or non-ZeroGPU environments.
"""

import re
from typing import Callable, TypeVar
from functools import wraps

T = TypeVar("T", bound=Callable)

# Default values in case the spaces package is unavailable (e.g., local runs).
ZERO_GPU_AVAILABLE = False

# Track whether we've fallen back to CPU due to quota exhaustion
_gpu_quota_exhausted = False
_quota_reset_time = None  # e.g. "13:53:59"
_user_forced_cpu = False

try:
    import spaces  # type: ignore

    gpu_decorator = spaces.GPU  # pragma: no cover
    ZERO_GPU_AVAILABLE = True
except Exception:
    def gpu_decorator(*decorator_args, **decorator_kwargs):
        """
        No-op replacement for spaces.GPU so code can run without the package
        or outside of a ZeroGPU Space.
        """

        def wrapper(func: T) -> T:
            return func

        # Support both bare @gpu_decorator and @gpu_decorator(...)
        if decorator_args and callable(decorator_args[0]) and not decorator_kwargs:
            return decorator_args[0]
        return wrapper


def is_quota_exhausted() -> bool:
    """Check if GPU quota has been exhausted this session."""
    return _gpu_quota_exhausted


def is_user_forced_cpu() -> bool:
    """Check if the user manually selected CPU mode."""
    return _user_forced_cpu


def get_quota_reset_time() -> str | None:
    """Return the quota reset time string (e.g. '13:53:59'), or None."""
    return _quota_reset_time


def reset_quota_flag():
    """Reset the quota exhausted flag (e.g., after quota resets)."""
    global _gpu_quota_exhausted, _quota_reset_time, _user_forced_cpu
    _gpu_quota_exhausted = False
    _quota_reset_time = None
    _user_forced_cpu = False


def force_cpu_mode():
    """Force all GPU-decorated functions to skip GPU and run on CPU."""
    global _user_forced_cpu
    _user_forced_cpu = True
    _move_models_to_cpu()


def _move_models_to_cpu():
    """Move all models back to CPU for fallback inference."""
    try:
        from .segmenter.segmenter_model import ensure_models_on_cpu
        ensure_models_on_cpu()
    except Exception as e:
        print(f"[CPU] Failed to move models to CPU: {e}")


def gpu_with_fallback(duration=60):
    """
    Decorator that wraps a GPU function with automatic CPU fallback.

    If ZeroGPU quota is exceeded, the function runs on CPU instead.
    The decorated function should call ensure_models_on_gpu() internally,
    which checks is_quota_exhausted() to decide whether to move to CUDA.

    Usage:
        @gpu_with_fallback(duration=60)
        def my_gpu_func(data):
            ensure_models_on_gpu()  # Moves to CUDA if quota not exhausted
            # ... inference using model's current device ...
    """
    def decorator(func: T) -> T:
        # Create the GPU-wrapped version
        if ZERO_GPU_AVAILABLE:
            gpu_func = gpu_decorator(duration=duration)(func)
        else:
            gpu_func = func

        @wraps(func)
        def wrapper(*args, **kwargs):
            global _gpu_quota_exhausted, _quota_reset_time

            # If user explicitly chose CPU mode, skip GPU entirely
            if _user_forced_cpu:
                print("[CPU] User selected CPU mode")
                return func(*args, **kwargs)

            # If quota already exhausted, go straight to CPU
            if _gpu_quota_exhausted:
                print("[GPU] Quota exhausted, using CPU fallback")
                _move_models_to_cpu()
                return func(*args, **kwargs)

            # Try GPU first
            try:
                return gpu_func(*args, **kwargs)
            except Exception as e:
                # ZeroGPU raises gradio.Error with title="ZeroGPU quota exceeded"
                is_quota_error = getattr(e, 'title', '') == "ZeroGPU quota exceeded"
                if not is_quota_error:
                    is_quota_error = 'quota' in str(e).lower()

                if is_quota_error:
                    print(f"[GPU] Quota exceeded, falling back to CPU: {e}")
                    _gpu_quota_exhausted = True
                    # Parse reset time from message like "Try again in 13:53:59"
                    match = re.search(r'Try again in (\d+:\d{2}:\d{2})', str(e))
                    if match:
                        _quota_reset_time = match.group(1)
                    # Show immediate toast notification
                    try:
                        import gradio as gr
                        reset_msg = f" Resets in {_quota_reset_time}." if _quota_reset_time else ""
                        gr.Warning(f"GPU quota reached — switching to CPU (slower).{reset_msg}")
                    except Exception:
                        pass  # Not in a Gradio context (e.g., CLI usage)
                    _move_models_to_cpu()
                    return func(*args, **kwargs)
                else:
                    err_lower = str(e).lower()
                    is_timeout = (
                        'timeout' in err_lower
                        or 'duration' in err_lower
                        or 'time limit' in err_lower
                    )
                    if is_timeout:
                        print(f"[GPU] Timeout error in {func.__name__}: {e}")
                    raise

        return wrapper
    return decorator