Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from io import BytesIO | |
| import librosa | |
| import numpy as np | |
| from os import getenv | |
| from PIL.Image import Image, open as open_image | |
| import soundfile as sf | |
| import requests | |
| from tempfile import NamedTemporaryFile | |
| import torch | |
| from transformers import AutoProcessor | |
| # Try to import spaces decorator (for Hugging Face Spaces), otherwise use no-op decorator. | |
| try: | |
| from spaces import GPU as spaces_gpu | |
| except ImportError: | |
| # For local development, use a no-op decorator because spaces is not available. | |
| def spaces_gpu(func): | |
| return func | |
| def get_pytorch_device() -> str: | |
| return ("cuda" if torch.cuda.is_available() # Nvidia CUDA and AMD ROCm | |
| else "xpu" if torch.xpu.is_available() # Intel XPU | |
| else "mps" if torch.mps.is_available() # Apple Silicon | |
| else "cpu") # gl bro 🫠 | |
| def request_image(url: str) -> Image: | |
| try: | |
| response = requests.get(url, timeout=int(getenv("REQUEST_TIMEOUT"))) | |
| response.raise_for_status() | |
| return open_image(BytesIO(response.content)) | |
| except requests.HTTPError as e: | |
| raise gr.Error(f"Failed to fetch image from URL because of HTTP error: {e.response.status_code} {e.response.text}") | |
| except requests.Timeout as e: | |
| raise gr.Error(f"Failed to fetch image from URL because the request timed out.") | |
| except requests.RequestException as e: | |
| raise gr.Error(f"Failed to fetch image from URL: {str(e)}") | |
| def save_image_to_temp_file(image: Image) -> str: | |
| image_format = image.format if image.format else 'PNG' | |
| format_extension = image_format.lower() if image_format else 'png' | |
| temp_file = NamedTemporaryFile(delete=False, suffix=f".{format_extension}") | |
| temp_path = temp_file.name | |
| temp_file.close() | |
| image.save(temp_path, format=image_format) | |
| return temp_path | |
| def get_model_sample_rate(model_id: str) -> int: | |
| try: | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| return processor.feature_extractor.sampling_rate | |
| except Exception: | |
| return 16000 # Fallback value as most ASR models use 16kHz | |
| def resample_audio(target_sample_rate: int, audio: tuple[int, bytes | np.ndarray]) -> np.ndarray: | |
| sample_rate, audio_data = audio | |
| # Convert audio data to a numpy array if it’s bytes | |
| if isinstance(audio_data, bytes): | |
| audio_array = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0 | |
| elif isinstance(audio_data, np.ndarray): | |
| audio_array = audio_data.astype(np.float32) | |
| else: | |
| raise ValueError(f"Unsupported audio_data type: {type(audio_data)}") | |
| # Resample if sample rates don’t match. | |
| if sample_rate != target_sample_rate: | |
| audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_sample_rate) | |
| return audio_array | |
| def save_audio_to_temp_file(target_sample_rate: int, audio: tuple[int, bytes | np.ndarray]) -> str: | |
| audio_array = resample_audio(target_sample_rate, audio) | |
| temp_file = NamedTemporaryFile(delete=False, suffix='.wav') | |
| temp_path = temp_file.name | |
| temp_file.close() | |
| sf.write(temp_path, audio_array, target_sample_rate, format='WAV') | |
| return temp_path | |