speecht5_tts

@@ -3,7 +3,7 @@ import numpy as np
 import torch
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from datasets import load_dataset
-import soundfile as sf
 from typing import Dict, List, Any
@@ -36,20 +36,18 @@ class EndpointHandler:
         speech = self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)
-        filename = "current_sample.wav"
         # Write the response audio to a file
-        sf.write(filename, speech.numpy(), samplerate=16000)
-        return send_file(filename, mimetype='audio/wav', as_attachment=True, attachment_filename=filename)
         # Return the expected response format
-        #return {
-        #    "statusCode": 200,
-        #    "body": {
-        #        "audio": speech.numpy(),  # Consider encoding this to a suitable format
-        #        "sampling_rate": 16000
-        #    }
-        #}
 handler = EndpointHandler()

 import torch
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from datasets import load_dataset
+#import soundfile as sf
 from typing import Dict, List, Any
         speech = self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)
+        #filename = "current_sample.wav"
         # Write the response audio to a file
+        #sf.write(filename, speech.numpy(), samplerate=16000)
         # Return the expected response format
+        return {
+            "statusCode": 200,
+            "body": {
+                "audio": speech.numpy(),  # Consider encoding this to a suitable format
+                "sampling_rate": 16000
+            }
+        }
 handler = EndpointHandler()