Upload folder using huggingface_hub
Browse files- docker-compose.yml +47 -0
- inference.py +3 -2
docker-compose.yml
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
miner_test_model:
|
| 3 |
+
build:
|
| 4 |
+
# context: /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general
|
| 5 |
+
dockerfile: Dockerfile
|
| 6 |
+
container_name: miner_test_model
|
| 7 |
+
deploy:
|
| 8 |
+
resources:
|
| 9 |
+
limits:
|
| 10 |
+
cpus: '4.0'
|
| 11 |
+
memory: 8G
|
| 12 |
+
reservations:
|
| 13 |
+
devices:
|
| 14 |
+
- capabilities:
|
| 15 |
+
- gpu
|
| 16 |
+
- utility
|
| 17 |
+
- compute
|
| 18 |
+
count: all
|
| 19 |
+
driver: nvidia
|
| 20 |
+
environment:
|
| 21 |
+
- NVIDIA_VISIBLE_DEVICES=all
|
| 22 |
+
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
| 23 |
+
- PYTHONUNBUFFERED=1
|
| 24 |
+
- MODEL_ID=tezuesh/moshi_general
|
| 25 |
+
- REPO_ID=tezuesh/moshi_general
|
| 26 |
+
- CUDA_VISIBLE_DEVICES=0
|
| 27 |
+
healthcheck:
|
| 28 |
+
interval: 10s
|
| 29 |
+
retries: 3
|
| 30 |
+
start_period: 20s
|
| 31 |
+
test:
|
| 32 |
+
- CMD
|
| 33 |
+
- curl
|
| 34 |
+
- -f
|
| 35 |
+
- http://localhost:8000/api/v1/health
|
| 36 |
+
timeout: 5s
|
| 37 |
+
image: miner_test_model:latest
|
| 38 |
+
ports:
|
| 39 |
+
- 8000:8000
|
| 40 |
+
restart: unless-stopped
|
| 41 |
+
shm_size: 2gb
|
| 42 |
+
ulimits:
|
| 43 |
+
memlock: -1
|
| 44 |
+
stack: 67108864
|
| 45 |
+
volumes:
|
| 46 |
+
- /home/salman/tezuesh/omegalabs-anytoany-bittensor/sandboxing/NewApproach/cache/tezuesh_moshi_general:/app/src:ro
|
| 47 |
+
version: '3'
|
inference.py
CHANGED
|
@@ -144,7 +144,8 @@ class InferenceRecipe:
|
|
| 144 |
tokens = self.lm_gen.step(codes[:, :, 0:1])
|
| 145 |
if tokens is not None:
|
| 146 |
_ = self.mimi.decode(tokens[:, 1:])
|
| 147 |
-
|
|
|
|
| 148 |
logger.info("Warmup pass completed")
|
| 149 |
|
| 150 |
except Exception as e:
|
|
@@ -198,7 +199,7 @@ class InferenceRecipe:
|
|
| 198 |
dict: Contains generated audio array and optional transcribed text
|
| 199 |
"""
|
| 200 |
try:
|
| 201 |
-
logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate}Hz")
|
| 202 |
|
| 203 |
# Load and preprocess audio
|
| 204 |
wav = self._load_audio(audio_array, sample_rate)
|
|
|
|
| 144 |
tokens = self.lm_gen.step(codes[:, :, 0:1])
|
| 145 |
if tokens is not None:
|
| 146 |
_ = self.mimi.decode(tokens[:, 1:])
|
| 147 |
+
|
| 148 |
+
torch.cuda.synchronize()
|
| 149 |
logger.info("Warmup pass completed")
|
| 150 |
|
| 151 |
except Exception as e:
|
|
|
|
| 199 |
dict: Contains generated audio array and optional transcribed text
|
| 200 |
"""
|
| 201 |
try:
|
| 202 |
+
logger.info(f"Starting inference on {len(audio_array)} samples at {sample_rate} Hz, self device: {self.device}")
|
| 203 |
|
| 204 |
# Load and preprocess audio
|
| 205 |
wav = self._load_audio(audio_array, sample_rate)
|