PantagrueLLM
/

speech-base-14K

Feature Extraction

Model card Files Files and versions

flaubert commited on Jan 22

Commit

7e45da3

·

verified ·

1 Parent(s): 62de87b

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -55,7 +55,6 @@ from transformers import AutoProcessor, AutoModel
 # load model
 model_name = "PantagrueLLM/speech-base-14K"
-# Note: please normalize the audio if not using AutoProcessor
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 model.eval()
@@ -63,6 +62,7 @@ model.eval()
 # load audio files
 wav, curr_sample_rate = sf.read("audio.wav", dtype="float32")
 feats = torch.from_numpy(wav).float()
 inputs = processor(feats, sampling_rate=16000, return_tensors="pt")
 # extract features

 # load model
 model_name = "PantagrueLLM/speech-base-14K"
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 model.eval()
 # load audio files
 wav, curr_sample_rate = sf.read("audio.wav", dtype="float32")
 feats = torch.from_numpy(wav).float()
+# Note: please normalize the audio if not using AutoProcessor
 inputs = processor(feats, sampling_rate=16000, return_tensors="pt")
 # extract features