Dupaja commited on
Commit
74cf751
·
1 Parent(s): aaf0168
Files changed (1) hide show
  1. handler.py +16 -16
handler.py CHANGED
@@ -1,37 +1,37 @@
 
1
  from transformers import pipeline
2
  import torch
3
  import soundfile as sf
4
- import base64
5
  import io
6
 
7
  class EndpointHandler:
8
- def __init__(self):
9
  self.synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
10
  self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
11
 
12
- def __call__(self, data):
13
  text = data.get("inputs", "")
14
  speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
15
 
16
  # Generate speech using the synthesiser
17
  speech = self.synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
18
 
19
- # Convert numpy audio array to bytes
20
- audio_bytes = io.BytesIO()
21
- sf.write(audio_bytes, speech["audio"], samplerate=speech["sampling_rate"], format='WAV')
22
- audio_bytes.seek(0)
23
- audio_base64 = base64.b64encode(audio_bytes.read()).decode('utf-8')
24
 
25
- # Create response
 
 
 
 
 
26
  response = {
27
  "statusCode": 200,
28
- "body": {
29
- "audio": audio_base64,
30
- "sampling_rate": speech["sampling_rate"]
31
- },
32
- "headers": {
33
- "Content-Type": "audio/wav"
34
- }
35
  }
36
 
37
  return response
 
1
+ from typing import Dict
2
  from transformers import pipeline
3
  import torch
4
  import soundfile as sf
 
5
  import io
6
 
7
  class EndpointHandler:
8
+ def __init__(self, path=""):
9
  self.synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
10
  self.embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
11
 
12
+ def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
13
  text = data.get("inputs", "")
14
  speaker_embedding = torch.tensor(self.embeddings_dataset[7306]["xvector"]).unsqueeze(0)
15
 
16
  # Generate speech using the synthesiser
17
  speech = self.synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
18
 
19
+ # Convert numpy audio array to a WAV byte stream.
20
+ audio_buffer = io.BytesIO()
21
+ sf.write(file=audio_buffer, data=speech["audio"], samplerate=speech["sampling_rate"], format='WAV')
22
+ audio_buffer.seek(0)
23
+ audio_wav = audio_buffer.read()
24
 
25
+ # Prepare the response headers.
26
+ headers = {
27
+ "Content-Type": "audio/wav"
28
+ }
29
+
30
+ # Create the response as raw audio bytes.
31
  response = {
32
  "statusCode": 200,
33
+ "body": audio_wav,
34
+ "headers": headers
 
 
 
 
 
35
  }
36
 
37
  return response