mispeech
/

dashengtokenizer

dashengtokenizer

feature-extraction

audio-classification

signal-processing

Model card Files Files and versions

richermans commited on Mar 3

Commit

9e7a843

·

verified ·

1 Parent(s): f945087

Update README.md

Files changed (1) hide show

README.md +2 -2

README.md CHANGED Viewed

@@ -65,13 +65,13 @@ audio, sr = torchaudio.load("path/to/audio.wav")
 # attention_mask[0, 8000:] = 0  # Example: mask second half of first sample
 # Method 1: End-to-end processing (encode + decode)
-with torch.no_grad(), torch.autocast(device_type='cuda')::
     outputs = model(audio)  # Optionally pass attention_mask=attention_mask
     reconstructed_audio = outputs["audio"]
     embeddings = outputs['embeddings']
 # Method 2: Separate encoding and decoding
-with torch.no_grad(), torch.autocast(device_type='cuda')::
     # Encode audio to embeddings
     embeddings = model.encode(audio)  # Optionally pass attention_mask=attention_mask

 # attention_mask[0, 8000:] = 0  # Example: mask second half of first sample
 # Method 1: End-to-end processing (encode + decode)
+with torch.no_grad(), torch.autocast(device_type='cuda'):
     outputs = model(audio)  # Optionally pass attention_mask=attention_mask
     reconstructed_audio = outputs["audio"]
     embeddings = outputs['embeddings']
 # Method 2: Separate encoding and decoding
+with torch.no_grad(), torch.autocast(device_type='cuda'):
     # Encode audio to embeddings
     embeddings = model.encode(audio)  # Optionally pass attention_mask=attention_mask