Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,191 Bytes
d56b9d9 0fea237 d56b9d9 0fea237 d56b9d9 dc382c8 d56b9d9 0fea237 dc382c8 d56b9d9 02c9b64 d56b9d9 dc382c8 02c9b64 0fea237 02c9b64 0fea237 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
from io import BytesIO
import librosa
import numpy as np
from os import getenv
from PIL.Image import Image, open as open_image
import soundfile as sf
import requests
from tempfile import NamedTemporaryFile
import torch
from transformers import AutoProcessor
# Try to import spaces decorator (for Hugging Face Spaces), otherwise use no-op decorator.
try:
from spaces import GPU as spaces_gpu
except ImportError:
# For local development, use a no-op decorator because spaces is not available.
def spaces_gpu(func):
return func
def get_pytorch_device() -> str:
return ("cuda" if torch.cuda.is_available() # Nvidia CUDA and AMD ROCm
else "xpu" if torch.xpu.is_available() # Intel XPU
else "mps" if torch.mps.is_available() # Apple Silicon
else "cpu") # gl bro 🫠
def request_image(url: str) -> Image:
try:
response = requests.get(url, timeout=int(getenv("REQUEST_TIMEOUT")))
response.raise_for_status()
return open_image(BytesIO(response.content))
except requests.HTTPError as e:
raise gr.Error(f"Failed to fetch image from URL because of HTTP error: {e.response.status_code} {e.response.text}")
except requests.Timeout as e:
raise gr.Error(f"Failed to fetch image from URL because the request timed out.")
except requests.RequestException as e:
raise gr.Error(f"Failed to fetch image from URL: {str(e)}")
def save_image_to_temp_file(image: Image) -> str:
image_format = image.format if image.format else 'PNG'
format_extension = image_format.lower() if image_format else 'png'
temp_file = NamedTemporaryFile(delete=False, suffix=f".{format_extension}")
temp_path = temp_file.name
temp_file.close()
image.save(temp_path, format=image_format)
return temp_path
def get_model_sample_rate(model_id: str) -> int:
try:
processor = AutoProcessor.from_pretrained(model_id)
return processor.feature_extractor.sampling_rate
except Exception:
return 16000 # Fallback value as most ASR models use 16kHz
def resample_audio(target_sample_rate: int, audio: tuple[int, bytes | np.ndarray]) -> np.ndarray:
sample_rate, audio_data = audio
# Convert audio data to a numpy array if it’s bytes
if isinstance(audio_data, bytes):
audio_array = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
elif isinstance(audio_data, np.ndarray):
audio_array = audio_data.astype(np.float32)
else:
raise ValueError(f"Unsupported audio_data type: {type(audio_data)}")
# Resample if sample rates don’t match.
if sample_rate != target_sample_rate:
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_sample_rate)
return audio_array
def save_audio_to_temp_file(target_sample_rate: int, audio: tuple[int, bytes | np.ndarray]) -> str:
audio_array = resample_audio(target_sample_rate, audio)
temp_file = NamedTemporaryFile(delete=False, suffix='.wav')
temp_path = temp_file.name
temp_file.close()
sf.write(temp_path, audio_array, target_sample_rate, format='WAV')
return temp_path
|