tezuesh
/

moshi_general

Model card Files Files and versions

tezuesh commited on Jan 20, 2025

Commit

378e3c8

·

verified ·

1 Parent(s): 353f03e

Upload folder using huggingface_hub

Files changed (2) hide show

docker-compose.yml +47 -0
inference.py +3 -2

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,47 @@

+services:
+  miner_test_model:
+    build:
+      # context: /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general
+      dockerfile: Dockerfile
+    container_name: miner_test_model
+    deploy:
+      resources:
+        limits:
+          cpus: '4.0'
+          memory: 8G
+        reservations:
+          devices:
+          - capabilities:
+            - gpu
+            - utility
+            - compute
+            count: all
+            driver: nvidia
+    environment:
+    - NVIDIA_VISIBLE_DEVICES=all
+    - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    - PYTHONUNBUFFERED=1
+    - MODEL_ID=tezuesh/moshi_general
+    - REPO_ID=tezuesh/moshi_general
+    - CUDA_VISIBLE_DEVICES=0
+    healthcheck:
+      interval: 10s
+      retries: 3
+      start_period: 20s
+      test:
+      - CMD
+      - curl
+      - -f
+      - http://localhost:8000/api/v1/health
+      timeout: 5s
+    image: miner_test_model:latest
+    ports:
+    - 8000:8000
+    restart: unless-stopped
+    shm_size: 2gb
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    volumes:
+    - /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general:/app/src:ro
+version: '3'

inference.py CHANGED Viewed

@@ -144,7 +144,8 @@ class InferenceRecipe:
                 tokens = self.lm_gen.step(codes[:, :, 0:1])
                 if tokens is not None:
                     _ = self.mimi.decode(tokens[:, 1:])
             logger.info("Warmup pass completed")
         except Exception as e:
@@ -198,7 +199,7 @@ class InferenceRecipe:
             dict: Contains generated audio array and optional transcribed text
         """
         try:
-            logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate}Hz")
             # Load and preprocess audio
             wav = self._load_audio(audio_array, sample_rate)

                 tokens = self.lm_gen.step(codes[:, :, 0:1])
                 if tokens is not None:
                     _ = self.mimi.decode(tokens[:, 1:])
+            torch.cuda.synchronize()
             logger.info("Warmup pass completed")
         except Exception as e:
             dict: Contains generated audio array and optional transcribed text
         """
         try:
+            logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate} Hz, self device: {self.device}")
             # Load and preprocess audio
             wav = self._load_audio(audio_array, sample_rate)