tezuesh
/

moshi_general

@@ -8,20 +8,17 @@ import logging
 from pathlib import Path
 from inference import InferenceRecipe
 from fastapi.middleware.cors import CORSMiddleware
-# Add these imports and configurations at the top
-import torch._inductor
-import torch._dynamo
-# Configure Inductor/Triton cache and fallback behavior
-os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
-os.environ["TORCH_INDUCTOR_CACHE_DIR"] = "/tmp/torch_cache"
-torch._inductor.config.suppress_errors = True
 torch._dynamo.config.suppress_errors = True
-# Create cache directories with correct permissions
-os.makedirs("/tmp/triton_cache", exist_ok=True)
-os.makedirs("/tmp/torch_cache", exist_ok=True)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -61,14 +58,12 @@ def initialize_model():
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Initializing model on device: {device}")
-        # Critical: Use absolute path for model loading
         model_path = os.path.abspath(os.path.join('/app/src', 'models'))
         logger.info(f"Loading models from: {model_path}")
         if not os.path.exists(model_path):
             raise RuntimeError(f"Model path {model_path} does not exist")
-        # Log available model files for debugging
         model_files = os.listdir(model_path)
         logger.info(f"Available model files: {model_files}")
@@ -115,23 +110,18 @@ async def inference(request: AudioRequest) -> AudioResponse:
         )
     try:
-        # Log input validation
         logger.info(f"Received inference request with sample rate: {request.sample_rate}")
-        # Decode audio
         audio_bytes = base64.b64decode(request.audio_data)
         audio_array = np.load(io.BytesIO(audio_bytes))
         logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
-        # Validate input format
         if len(audio_array.shape) != 2:
             raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
-        # Run inference
         result = model.inference(audio_array, request.sample_rate)
         logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
-        # Encode output
         buffer = io.BytesIO()
         np.save(buffer, result['audio'])
         audio_b64 = base64.b64encode(buffer.getvalue()).decode()

 from pathlib import Path
 from inference import InferenceRecipe
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+# Configure PyTorch behavior - only use supported configs
 torch._dynamo.config.suppress_errors = True
+# Disable optimizations via environment variables
+os.environ["TORCH_LOGS"] = "+dynamo"
+os.environ["TORCHDYNAMO_VERBOSE"] = "1"
+os.environ["TORCH_COMPILE_DEBUG"] = "1"
+os.environ["TORCHINDUCTOR_DISABLE_CUDAGRAPHS"] = "1"
+os.environ["TORCH_COMPILE"] = "0"  # Disable torch.compile
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Initializing model on device: {device}")
         model_path = os.path.abspath(os.path.join('/app/src', 'models'))
         logger.info(f"Loading models from: {model_path}")
         if not os.path.exists(model_path):
             raise RuntimeError(f"Model path {model_path} does not exist")
         model_files = os.listdir(model_path)
         logger.info(f"Available model files: {model_files}")
         )
     try:
         logger.info(f"Received inference request with sample rate: {request.sample_rate}")
         audio_bytes = base64.b64decode(request.audio_data)
         audio_array = np.load(io.BytesIO(audio_bytes))
         logger.info(f"Decoded audio array shape: {audio_array.shape}, dtype: {audio_array.dtype}")
         if len(audio_array.shape) != 2:
             raise ValueError(f"Expected 2D audio array [C,T], got shape {audio_array.shape}")
         result = model.inference(audio_array, request.sample_rate)
         logger.info(f"Inference complete. Output shape: {result['audio'].shape}")
         buffer = io.BytesIO()
         np.save(buffer, result['audio'])
         audio_b64 = base64.b64encode(buffer.getvalue()).decode()