mazesmazes
/

tiny-audio

@@ -19,6 +19,11 @@ class EndpointHandler:
         # Set environment variables for PyTorch/CUDA (must be before imports/operations)
         import os
         # Enable expandable segments to reduce fragmentation
         os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
@@ -57,7 +62,7 @@ class EndpointHandler:
         # Apply torch.compile if enabled (after model is loaded by pipeline)
         # Enable by default for significant speedup (20-40%)
-        if torch.cuda.is_available() and os.getenv("ENABLE_TORCH_COMPILE", "1") == "1":
             compile_mode = os.getenv("TORCH_COMPILE_MODE", "reduce-overhead")
             self.model = torch.compile(self.model, mode=compile_mode)
             # Update the pipeline with the compiled model

         # Set environment variables for PyTorch/CUDA (must be before imports/operations)
         import os
+        # Download NLTK data for truecasing (needed by the pipeline)
+        import nltk
+        nltk.download("punkt_tab", quiet=True)
         # Enable expandable segments to reduce fragmentation
         os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
         # Apply torch.compile if enabled (after model is loaded by pipeline)
         # Enable by default for significant speedup (20-40%)
+        if torch.cuda.is_available():
             compile_mode = os.getenv("TORCH_COMPILE_MODE", "reduce-overhead")
             self.model = torch.compile(self.model, mode=compile_mode)
             # Update the pipeline with the compiled model