bark

H-H-E commited on Jan 8, 2024

Commit

7a8a532

1 Parent(s): 30769ce

Upload handler.py

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -1,28 +1,28 @@
 from typing import Dict, List, Any
-from transformers import pipeline
-import scipy.io.wavfile
 class EndpointHandler:
     def __init__(self, path=""):
-        self.synthesiser = pipeline("text-generation", "suno/bark")  # Attempt to create pipeline
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        text_prompt = data.get("inputs")
-        if not text_prompt:
-            raise ValueError("Missing required 'inputs' field in request data.")
         try:
-            print(self.synthesiser)
-            speech = self.synthesiser(text_prompt, forward_params={"do_sample": True})
-            print(speech)
-            audio_data = speech["audio"]  # Assuming audio is in a NumPy array
-            sampling_rate = speech["sampling_rate"]
-            # Return audio data as a byte string (adjust format as needed)
             audio_bytes = audio_data.tobytes()
             return {"audio": audio_bytes, "sampling_rate": sampling_rate}
         except Exception as e:
-            # Handle potential errors with model loading or usage
             return {"error": str(e)}

 from typing import Dict, List, Any
+from transformers import AutoProcessor, AutoModel
+import scipy.io.wavfile  # Assuming WAV output format
 class EndpointHandler:
     def __init__(self, path=""):
+        self.processor = AutoProcessor.from_pretrained("suno/bark")
+        self.model = AutoModel.from_pretrained("suno/bark")
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         try:
+            text_prompt = data.get("inputs")
+            if not text_prompt:
+                raise ValueError("Missing required 'inputs' field in request data.")
+            inputs = self.processor(text=[text_prompt], return_tensors="pt")
+            speech_values = self.model.generate(**inputs, do_sample=True)
+            # Assuming model returns audio as NumPy array
+            audio_data = speech_values[0].numpy()
+            sampling_rate = 22050  # Adjust as needed based on model documentation
+            # Return audio data as a byte string
             audio_bytes = audio_data.tobytes()
             return {"audio": audio_bytes, "sampling_rate": sampling_rate}
         except Exception as e:
             return {"error": str(e)}