import torch, numpy as np, librosa from transformers import ASTForAudioClassification, ASTFeatureExtractor SR=16000; SEG_LEN=SR*14; HOP_LEN=SR*7 def rms_normalize(x): return x*(0.15/(np.sqrt((x**2).mean())+1e-8)) def get_segments(y): segs=[] for s in range(0,len(y)-SEG_LEN+1,HOP_LEN): segs.append(y[s:s+SEG_LEN]) if not segs: segs=[y[:SEG_LEN]] return segs class Model: def __init__(self, path): self.fe = ASTFeatureExtractor.from_pretrained(path) self.model = ASTForAudioClassification.from_pretrained(path) self.model.eval() def __call__(self, fp): y,_=librosa.load(fp,sr=SR) if len(y)