musicgen-medium

@@ -2,11 +2,18 @@ from typing import Dict, List, Any
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
         # load model and processor from path
         self.processor = AutoProcessor.from_pretrained(path)
-        self.model = MusicgenForConditionalGeneration.from_pretrained(path, torch_dtype=torch.float16).to("cuda")
     def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
         """
@@ -17,22 +24,31 @@ class EndpointHandler:
         # process input
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
         # preprocess
         inputs = self.processor(
             text=[inputs],
             padding=True,
-            return_tensors="pt",).to("cuda")
         # pass inputs with all kwargs in data
         if parameters is not None:
-            with torch.autocast("cuda"):
-                outputs = self.model.generate(**inputs, **parameters, do_sample=True, guidance_scale=3)
         else:
-            with torch.autocast("cuda"):
-                outputs = self.model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=450)
         # postprocess the prediction
-        prediction = outputs[0].cpu().numpy().tolist()
-        return [{"generated_audio": prediction}]

 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
         # load model and processor from path
         self.processor = AutoProcessor.from_pretrained(path)
+        # Check if CUDA is available, and set the device accordingly
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load the model to the device
+        self.model = MusicgenForConditionalGeneration.from_pretrained(path)
+        self.model.to(self.device)  # Correcting this line
     def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
         """
         # process input
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+        duration = parameters.pop("duration", None)
+        if duration is not None:
+            # Calculate max new tokens based on duration, this is a placeholder, replace with actual logic
+            max_new_tokens = int(duration * 50)
+        else:
+            max_new_tokens = 256  # Default value if duration is not provided
         # preprocess
         inputs = self.processor(
             text=[inputs],
             padding=True,
+            return_tensors="pt",).to(self.device)
+        # If 'duration' is inside 'parameters', remove it
+        if parameters is not None and 'duration' in parameters:
+            parameters.pop('duration')
         # pass inputs with all kwargs in data
         if parameters is not None:
+            outputs = self.model.generate(**inputs, max_new_tokens=max_new_tokens, **parameters)
         else:
+            outputs = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
         # postprocess the prediction
+        prediction = outputs[0].cpu().numpy()
+        return [{"generated_text": prediction}]