Update server.py
Browse files
server.py
CHANGED
|
@@ -8,20 +8,17 @@ import logging
|
|
| 8 |
from pathlib import Path
|
| 9 |
from inference import InferenceRecipe
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
import torch._inductor
|
| 14 |
-
import torch._dynamo
|
| 15 |
-
|
| 16 |
-
# Configure Inductor/Triton cache and fallback behavior
|
| 17 |
-
os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
|
| 18 |
-
os.environ["TORCH_INDUCTOR_CACHE_DIR"] = "/tmp/torch_cache"
|
| 19 |
-
torch._inductor.config.suppress_errors = True
|
| 20 |
torch._dynamo.config.suppress_errors = True
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
os.
|
| 24 |
-
os.
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
logging.basicConfig(level=logging.INFO)
|
| 27 |
logger = logging.getLogger(__name__)
|
|
@@ -61,14 +58,12 @@ def initialize_model():
|
|
| 61 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 62 |
logger.info(f"Initializing model on device: {device}")
|
| 63 |
|
| 64 |
-
# Critical: Use absolute path for model loading
|
| 65 |
model_path = os.path.abspath(os.path.join('/app/src', 'models'))
|
| 66 |
logger.info(f"Loading models from: {model_path}")
|
| 67 |
|
| 68 |
if not os.path.exists(model_path):
|
| 69 |
raise RuntimeError(f"Model path {model_path} does not exist")
|
| 70 |
|
| 71 |
-
# Log available model files for debugging
|
| 72 |
model_files = os.listdir(model_path)
|
| 73 |
logger.info(f"Available model files: {model_files}")
|
| 74 |
|
|
@@ -115,23 +110,18 @@ async def inference(request: AudioRequest) -> AudioResponse:
|
|
| 115 |
)
|
| 116 |
|
| 117 |
try:
|
| 118 |
-
# Log input validation
|
| 119 |
logger.info(f"Received inference request with sample rate: {request.sample_rate}")
|
| 120 |
|
| 121 |
-
# Decode audio
|
| 122 |
audio_bytes = base64.b64decode(request.audio_data)
|
| 123 |
audio_array = np.load(io.BytesIO(audio_bytes))
|
| 124 |
logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
|
| 125 |
|
| 126 |
-
# Validate input format
|
| 127 |
if len(audio_array.shape) != 2:
|
| 128 |
raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
|
| 129 |
|
| 130 |
-
# Run inference
|
| 131 |
result = model.inference(audio_array, request.sample_rate)
|
| 132 |
logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
|
| 133 |
|
| 134 |
-
# Encode output
|
| 135 |
buffer = io.BytesIO()
|
| 136 |
np.save(buffer, result['audio'])
|
| 137 |
audio_b64 = base64.b64encode(buffer.getvalue()).decode()
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
from inference import InferenceRecipe
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
from pydantic import BaseModel
|
| 12 |
|
| 13 |
+
# Configure PyTorch behavior - only use supported configs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
torch._dynamo.config.suppress_errors = True
|
| 15 |
|
| 16 |
+
# Disable optimizations via environment variables
|
| 17 |
+
os.environ["TORCH_LOGS"] = "+dynamo"
|
| 18 |
+
os.environ["TORCHDYNAMO_VERBOSE"] = "1"
|
| 19 |
+
os.environ["TORCH_COMPILE_DEBUG"] = "1"
|
| 20 |
+
os.environ["TORCHINDUCTOR_DISABLE_CUDAGRAPHS"] = "1"
|
| 21 |
+
os.environ["TORCH_COMPILE"] = "0" # Disable torch.compile
|
| 22 |
|
| 23 |
logging.basicConfig(level=logging.INFO)
|
| 24 |
logger = logging.getLogger(__name__)
|
|
|
|
| 58 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 59 |
logger.info(f"Initializing model on device: {device}")
|
| 60 |
|
|
|
|
| 61 |
model_path = os.path.abspath(os.path.join('/app/src', 'models'))
|
| 62 |
logger.info(f"Loading models from: {model_path}")
|
| 63 |
|
| 64 |
if not os.path.exists(model_path):
|
| 65 |
raise RuntimeError(f"Model path {model_path} does not exist")
|
| 66 |
|
|
|
|
| 67 |
model_files = os.listdir(model_path)
|
| 68 |
logger.info(f"Available model files: {model_files}")
|
| 69 |
|
|
|
|
| 110 |
)
|
| 111 |
|
| 112 |
try:
|
|
|
|
| 113 |
logger.info(f"Received inference request with sample rate: {request.sample_rate}")
|
| 114 |
|
|
|
|
| 115 |
audio_bytes = base64.b64decode(request.audio_data)
|
| 116 |
audio_array = np.load(io.BytesIO(audio_bytes))
|
| 117 |
logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
|
| 118 |
|
|
|
|
| 119 |
if len(audio_array.shape) != 2:
|
| 120 |
raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
|
| 121 |
|
|
|
|
| 122 |
result = model.inference(audio_array, request.sample_rate)
|
| 123 |
logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
|
| 124 |
|
|
|
|
| 125 |
buffer = io.BytesIO()
|
| 126 |
np.save(buffer, result['audio'])
|
| 127 |
audio_b64 = base64.b64encode(buffer.getvalue()).decode()
|