Spaces:
Running on Zero
Running on Zero
Commit ·
a3b50ab
1
Parent(s): 8c0b009
Reorganize imports for moshi models and streamline text tokenizer loading in get_models function
Browse files
app.py
CHANGED
|
@@ -9,10 +9,6 @@ from typing import Optional
|
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
import sentencepiece
|
| 11 |
|
| 12 |
-
# PersonaPlex model imports - installed via: pip install git+https://github.com/NVIDIA/personaplex.git#subdirectory=moshi
|
| 13 |
-
from moshi.models import loaders, LMGen
|
| 14 |
-
from moshi.models.lm import load_audio, _iterate_audio, encode_from_sphn
|
| 15 |
-
|
| 16 |
# Configuration
|
| 17 |
HF_REPO = "nvidia/personaplex-7b-v1"
|
| 18 |
DEVICE = "cuda"
|
|
@@ -34,6 +30,10 @@ EXAMPLE_PERSONAS = [
|
|
| 34 |
"You enjoy having a good conversation. Have a technical discussion about fixing a reactor core on a spaceship to Mars. You are an astronaut on a Mars mission. Your name is Alex.",
|
| 35 |
]
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# Pre-download model weights at startup (cached by huggingface_hub)
|
| 38 |
print("Downloading model weights...")
|
| 39 |
MIMI_WEIGHT = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
|
|
@@ -49,6 +49,9 @@ if not VOICES_DIR.exists():
|
|
| 49 |
tar.extractall(path=Path(VOICES_TGZ).parent)
|
| 50 |
print("Model weights ready.")
|
| 51 |
|
|
|
|
|
|
|
|
|
|
| 52 |
# Global model cache - models loaded lazily inside @spaces.GPU
|
| 53 |
_model_cache = {}
|
| 54 |
|
|
@@ -67,9 +70,6 @@ def get_models():
|
|
| 67 |
lm = loaders.get_moshi_lm(MOSHI_WEIGHT, device=DEVICE)
|
| 68 |
lm.eval()
|
| 69 |
|
| 70 |
-
# Load text tokenizer
|
| 71 |
-
text_tokenizer = sentencepiece.SentencePieceProcessor(TOKENIZER_PATH)
|
| 72 |
-
|
| 73 |
# Create LMGen wrapper
|
| 74 |
frame_size = int(mimi.sample_rate / mimi.frame_rate)
|
| 75 |
lm_gen = LMGen(
|
|
@@ -93,7 +93,6 @@ def get_models():
|
|
| 93 |
"mimi": mimi,
|
| 94 |
"other_mimi": other_mimi,
|
| 95 |
"lm_gen": lm_gen,
|
| 96 |
-
"text_tokenizer": text_tokenizer,
|
| 97 |
"frame_size": frame_size,
|
| 98 |
"initialized": True,
|
| 99 |
})
|
|
@@ -131,7 +130,6 @@ def generate_response(audio_input, persona: str, voice: str):
|
|
| 131 |
mimi = models["mimi"]
|
| 132 |
other_mimi = models["other_mimi"]
|
| 133 |
lm_gen = models["lm_gen"]
|
| 134 |
-
text_tokenizer = models["text_tokenizer"]
|
| 135 |
frame_size = models["frame_size"]
|
| 136 |
|
| 137 |
# Process input audio
|
|
|
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
import sentencepiece
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# Configuration
|
| 13 |
HF_REPO = "nvidia/personaplex-7b-v1"
|
| 14 |
DEVICE = "cuda"
|
|
|
|
| 30 |
"You enjoy having a good conversation. Have a technical discussion about fixing a reactor core on a spaceship to Mars. You are an astronaut on a Mars mission. Your name is Alex.",
|
| 31 |
]
|
| 32 |
|
| 33 |
+
# Import moshi after spaces to allow interception
|
| 34 |
+
from moshi.models import loaders, LMGen
|
| 35 |
+
from moshi.models.lm import load_audio, _iterate_audio, encode_from_sphn
|
| 36 |
+
|
| 37 |
# Pre-download model weights at startup (cached by huggingface_hub)
|
| 38 |
print("Downloading model weights...")
|
| 39 |
MIMI_WEIGHT = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
|
|
|
|
| 49 |
tar.extractall(path=Path(VOICES_TGZ).parent)
|
| 50 |
print("Model weights ready.")
|
| 51 |
|
| 52 |
+
# Load text tokenizer (CPU only, no CUDA needed)
|
| 53 |
+
text_tokenizer = sentencepiece.SentencePieceProcessor(TOKENIZER_PATH)
|
| 54 |
+
|
| 55 |
# Global model cache - models loaded lazily inside @spaces.GPU
|
| 56 |
_model_cache = {}
|
| 57 |
|
|
|
|
| 70 |
lm = loaders.get_moshi_lm(MOSHI_WEIGHT, device=DEVICE)
|
| 71 |
lm.eval()
|
| 72 |
|
|
|
|
|
|
|
|
|
|
| 73 |
# Create LMGen wrapper
|
| 74 |
frame_size = int(mimi.sample_rate / mimi.frame_rate)
|
| 75 |
lm_gen = LMGen(
|
|
|
|
| 93 |
"mimi": mimi,
|
| 94 |
"other_mimi": other_mimi,
|
| 95 |
"lm_gen": lm_gen,
|
|
|
|
| 96 |
"frame_size": frame_size,
|
| 97 |
"initialized": True,
|
| 98 |
})
|
|
|
|
| 130 |
mimi = models["mimi"]
|
| 131 |
other_mimi = models["other_mimi"]
|
| 132 |
lm_gen = models["lm_gen"]
|
|
|
|
| 133 |
frame_size = models["frame_size"]
|
| 134 |
|
| 135 |
# Process input audio
|