mazesmazes commited on
Commit
3b25064
·
verified ·
1 Parent(s): 4733564

Update custom model files, README, and requirements

Browse files
Files changed (1) hide show
  1. handler.py +6 -1
handler.py CHANGED
@@ -19,6 +19,11 @@ class EndpointHandler:
19
  # Set environment variables for PyTorch/CUDA (must be before imports/operations)
20
  import os
21
 
 
 
 
 
 
22
  # Enable expandable segments to reduce fragmentation
23
  os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
24
 
@@ -57,7 +62,7 @@ class EndpointHandler:
57
 
58
  # Apply torch.compile if enabled (after model is loaded by pipeline)
59
  # Enable by default for significant speedup (20-40%)
60
- if torch.cuda.is_available() and os.getenv("ENABLE_TORCH_COMPILE", "1") == "1":
61
  compile_mode = os.getenv("TORCH_COMPILE_MODE", "reduce-overhead")
62
  self.model = torch.compile(self.model, mode=compile_mode)
63
  # Update the pipeline with the compiled model
 
19
  # Set environment variables for PyTorch/CUDA (must be before imports/operations)
20
  import os
21
 
22
+ # Download NLTK data for truecasing (needed by the pipeline)
23
+ import nltk
24
+
25
+ nltk.download("punkt_tab", quiet=True)
26
+
27
  # Enable expandable segments to reduce fragmentation
28
  os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
29
 
 
62
 
63
  # Apply torch.compile if enabled (after model is loaded by pipeline)
64
  # Enable by default for significant speedup (20-40%)
65
+ if torch.cuda.is_available():
66
  compile_mode = os.getenv("TORCH_COMPILE_MODE", "reduce-overhead")
67
  self.model = torch.compile(self.model, mode=compile_mode)
68
  # Update the pipeline with the compiled model