Safetensors
EarthSpeciesProject
NatureLM
File size: 2,695 Bytes
240e8e1
 
 
 
e3ad53e
377fac9
240e8e1
 
 
b8f460c
e3ad53e
b8f460c
e3ad53e
f4b54ec
 
 
 
 
 
 
 
 
 
240e8e1
 
 
8172885
240e8e1
e3ad53e
 
 
 
8172885
 
e3ad53e
 
 
 
 
 
 
 
 
 
240e8e1
 
8172885
240e8e1
 
 
 
 
 
 
 
 
fe72895
 
 
 
 
 
240e8e1
 
 
 
 
 
 
 
5a6ef06
240e8e1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from typing import Dict, List, Any
from NatureLM.models import NatureLM
from NatureLM.infer import Pipeline
import numpy as np
import torch
import os

class EndpointHandler():
    def __init__(self, path=""):
        auth_token = os.environ.get("LLAMA_TOK")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio", device=device, hf_auth_token=auth_token)
        self.model = model.eval().to(device)

        cfg_path = "inference.yml"
        if not os.path.exists(cfg_path):
            script_dir = os.path.dirname(os.path.abspath(__file__))
            cfg_path = os.path.join(script_dir, "inference.yml")
        
        if not os.path.exists(cfg_path):
            raise FileNotFoundError(f"inference.yml not found at {cfg_path}. Current directory contents: {os.listdir('.')}")
        
        self.pipeline = Pipeline(model=self.model, cfg_path=cfg_path)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Process audio list of floats with NatureLM-audio model.
        
        Parameters
        ----------
        data : Dict[str, Any]
            Dictionary containing:
            - inputs : list[float]
                Audio data as list of floats
            - query : str
                Question to ask about the audio
            - sample_rate : int, optional
                Audio sample rate, default 16000
        
        Returns
        -------
        List[Dict[str, Any]]
            List containing result dictionary with 'result' and 'query' keys,
            or error dictionary with 'error' key
        """
        
        audio = data.get("inputs")
        query = data.get("query", "")
        sample_rate = data.get("sample_rate", 16000)
        
        if audio is None:
            return [{"error": "No audio data provided"}]
        
        if not query:
            return [{"error": "No query provided"}]
        
        # Convert list to numpy array if needed (when sent via JSON)
        if isinstance(audio, list):
            audio = np.array(audio, dtype=np.float32)
        elif not isinstance(audio, np.ndarray):
            return [{"error": f"Audio data must be a list or numpy array, got {type(audio)}"}]
        
        try:
            # Run inference using the pipeline
            results = self.pipeline(
                audios=[audio],
                queries=query,
                input_sample_rate=sample_rate
            )
            
            return [{"result": results, "query": query}]
            
        except Exception as e:
            return [{"error": f"Error processing audio: {str(e)}"}]