Automatic Speech Recognition
Transformers
Safetensors
English
asr_model
feature-extraction
asr
speech-recognition
audio
qwen
glm-asr
custom_code
Instructions to use mazesmazes/tiny-audio-next-multiasr with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use mazesmazes/tiny-audio-next-multiasr with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="mazesmazes/tiny-audio-next-multiasr", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("mazesmazes/tiny-audio-next-multiasr", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 2,315 Bytes
f9b8048 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | """Custom inference handler for HuggingFace Inference Endpoints."""
from typing import Any, Dict, List, Union
try:
# For remote execution, imports are relative
from .asr_modeling import ASRModel
from .asr_pipeline import ASRPipeline
except ImportError:
# For local execution, imports are not relative
from asr_modeling import ASRModel # type: ignore[no-redef]
from asr_pipeline import ASRPipeline # type: ignore[no-redef]
class EndpointHandler:
"""HuggingFace Inference Endpoints handler for ASR model.
Handles model loading, warmup, and inference requests for deployment
on HuggingFace Inference Endpoints or similar services.
"""
def __init__(self, path: str = ""):
"""Initialize the endpoint handler.
Args:
path: Path to model directory or HuggingFace model ID
"""
import os
import nltk
from transformers.utils import is_flash_attn_2_available
nltk.download("punkt_tab", quiet=True)
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
model_kwargs = {
"device_map": "auto",
"torch_dtype": "auto",
"low_cpu_mem_usage": True,
}
if is_flash_attn_2_available():
model_kwargs["attn_implementation"] = "flash_attention_2"
self.model = ASRModel.from_pretrained(path, **model_kwargs)
self.device = next(self.model.parameters()).device
self.pipe = ASRPipeline(
model=self.model,
feature_extractor=self.model.feature_extractor,
tokenizer=self.model.tokenizer,
device=self.device,
)
def __call__(self, data: Dict[str, Any]) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
"""Process an inference request.
Args:
data: Request data containing 'inputs' (audio path/bytes) and optional 'parameters'
Returns:
Transcription result with 'text' key
"""
inputs = data.get("inputs")
if inputs is None:
raise ValueError("Missing 'inputs' in request data")
# Pass through any parameters from request, let model config provide defaults
params = data.get("parameters", {})
return self.pipe(inputs, **params)
|