Jenthe
/

ECAPA2

Model card Files Files and versions

xet

Jenthe commited on Oct 16, 2023

Commit

dcb24e0

1 Parent(s): b06190b

Update README.md

Browse files

Files changed (1) hide show

README.md +9 -9

README.md CHANGED Viewed

@@ -43,7 +43,7 @@ Download model:
 from huggingface_hub import hf_hub_download
 # automatically checks for cached file, optionally set `cache_dir` location
-model_file = hf_hub_download(repo_id='Jenthe/ECAPA2', filename='model.pt', cache_dir=None)
 ```
@@ -55,10 +55,10 @@ Extracting speaker embeddings is easy and only requires a few lines of code:
 import torch
 import torchaudio
-ecapa2_model = torch.jit.load(model_file, map_location='cpu')
 audio, sr = torchaudio.load('sample.wav') # sample rate of 16 kHz expected
-embedding = ecapa2_model(audio)
 ```
 For faster, 16-bit half-precision CUDA inference (recommended):
@@ -67,11 +67,11 @@ For faster, 16-bit half-precision CUDA inference (recommended):
 import torch
 import torchaudio
-ecapa2_model = torch.jit.load(model_file, map_location='cuda')
-ecapa2_model.half() # optional, but results in faster inference
 audio, sr = torchaudio.load('sample.wav') # sample rate of 16 kHz expected
-embedding = ecapa2_model(audio)
 ```
 There is no need for `ecapa2_model.eval()` or `torch.no_grad()`, this is done automatically.
@@ -82,13 +82,13 @@ For the extraction of other hierachical features, the `label` argument can be us
 ```python
 # default, only extract the embedding
-feature = ecapa2_model(audio, label='embedding')
 # concatenates the gfe_1, pool and embedding features
-feature = ecapa2_model(audio, label='gfe_1|pool|embedding')
 # returns the same output as previous example, concatenation always follows the order of the network
-feature = ecapa2_model(audio, label='embedding|gfe_1|pool')
 ```
 The following table describes the available features. All features consists of the mean and variance of the frame-level encodings at the indicated layer, expect for the speaker embedding.

 from huggingface_hub import hf_hub_download
 # automatically checks for cached file, optionally set `cache_dir` location
+model_file = hf_hub_download(repo_id='Jenthe/ECAPA2', filename='ecapa2.pt', cache_dir=None)
 ```
 import torch
 import torchaudio
+ecapa2 = torch.jit.load(model_file, map_location='cpu')
 audio, sr = torchaudio.load('sample.wav') # sample rate of 16 kHz expected
+embedding = ecapa2(audio)
 ```
 For faster, 16-bit half-precision CUDA inference (recommended):
 import torch
 import torchaudio
+ecapa2 = torch.jit.load(model_file, map_location='cuda')
+ecapa2.half() # optional, but results in faster inference
 audio, sr = torchaudio.load('sample.wav') # sample rate of 16 kHz expected
+embedding = ecapa2(audio)
 ```
 There is no need for `ecapa2_model.eval()` or `torch.no_grad()`, this is done automatically.
 ```python
 # default, only extract the embedding
+feature = ecapa2(audio, label='embedding')
 # concatenates the gfe_1, pool and embedding features
+feature = ecapa2(audio, label='gfe_1|pool|embedding')
 # returns the same output as previous example, concatenation always follows the order of the network
+feature = ecapa2(audio, label='embedding|gfe_1|pool')
 ```
 The following table describes the available features. All features consists of the mean and variance of the frame-level encodings at the indicated layer, expect for the speaker embedding.