yangwang825 commited on
Commit
dbde56d
·
1 Parent(s): cd6fd2a

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -0
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MERT
2
+
3
+ ## Usage
4
+
5
+ ```python
6
+ import numpy as np
7
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
8
+
9
+ model_id = 'yangwang825/mert-base'
10
+ batch_size = 4
11
+ num_classes = 10
12
+ max_duration = 1.0
13
+
14
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
15
+ model_id,
16
+ trust_remote_code=True
17
+ )
18
+ mert = AutoModelForAudioClassification.from_pretrained(
19
+ model_id,
20
+ num_labels=num_classes,
21
+ ignore_mismatched_sizes=True,
22
+ trust_remote_code=True
23
+ )
24
+
25
+ # Simulate the list of waveforms
26
+ audio_arrays = [np.random.rand(16000, ) for _ in range(batch_size)]
27
+ inputs = feature_extractor(
28
+ audio_arrays, # List of waveforms in numpy array format
29
+ sampling_rate=feature_extractor.sampling_rate,
30
+ max_length=int(feature_extractor.sampling_rate * max_duration),
31
+ padding=True,
32
+ truncation=True,
33
+ return_tensors='pt'
34
+ )
35
+ logits = mert(**inputs) # The logits shape is (batch_size, num_classes)
36
+ ```