PersonaPlex

Running on Zero

App Files Files Community

MohamedRashad commited on Jan 18

Commit

a3b50ab

1 Parent(s): 8c0b009

Reorganize imports for moshi models and streamline text tokenizer loading in get_models function

Browse files

Files changed (1) hide show

app.py +7 -9

app.py CHANGED Viewed

@@ -9,10 +9,6 @@ from typing import Optional
 from huggingface_hub import hf_hub_download
 import sentencepiece
-# PersonaPlex model imports - installed via: pip install git+https://github.com/NVIDIA/personaplex.git#subdirectory=moshi
-from moshi.models import loaders, LMGen
-from moshi.models.lm import load_audio, _iterate_audio, encode_from_sphn
 # Configuration
 HF_REPO = "nvidia/personaplex-7b-v1"
 DEVICE = "cuda"
@@ -34,6 +30,10 @@ EXAMPLE_PERSONAS = [
     "You enjoy having a good conversation. Have a technical discussion about fixing a reactor core on a spaceship to Mars. You are an astronaut on a Mars mission. Your name is Alex.",
 ]
 # Pre-download model weights at startup (cached by huggingface_hub)
 print("Downloading model weights...")
 MIMI_WEIGHT = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
@@ -49,6 +49,9 @@ if not VOICES_DIR.exists():
         tar.extractall(path=Path(VOICES_TGZ).parent)
 print("Model weights ready.")
 # Global model cache - models loaded lazily inside @spaces.GPU
 _model_cache = {}
@@ -67,9 +70,6 @@ def get_models():
         lm = loaders.get_moshi_lm(MOSHI_WEIGHT, device=DEVICE)
         lm.eval()
-        # Load text tokenizer
-        text_tokenizer = sentencepiece.SentencePieceProcessor(TOKENIZER_PATH)
         # Create LMGen wrapper
         frame_size = int(mimi.sample_rate / mimi.frame_rate)
         lm_gen = LMGen(
@@ -93,7 +93,6 @@ def get_models():
             "mimi": mimi,
             "other_mimi": other_mimi,
             "lm_gen": lm_gen,
-            "text_tokenizer": text_tokenizer,
             "frame_size": frame_size,
             "initialized": True,
         })
@@ -131,7 +130,6 @@ def generate_response(audio_input, persona: str, voice: str):
     mimi = models["mimi"]
     other_mimi = models["other_mimi"]
     lm_gen = models["lm_gen"]
-    text_tokenizer = models["text_tokenizer"]
     frame_size = models["frame_size"]
     # Process input audio

 from huggingface_hub import hf_hub_download
 import sentencepiece
 # Configuration
 HF_REPO = "nvidia/personaplex-7b-v1"
 DEVICE = "cuda"
     "You enjoy having a good conversation. Have a technical discussion about fixing a reactor core on a spaceship to Mars. You are an astronaut on a Mars mission. Your name is Alex.",
 ]
+# Import moshi after spaces to allow interception
+from moshi.models import loaders, LMGen
+from moshi.models.lm import load_audio, _iterate_audio, encode_from_sphn
 # Pre-download model weights at startup (cached by huggingface_hub)
 print("Downloading model weights...")
 MIMI_WEIGHT = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
         tar.extractall(path=Path(VOICES_TGZ).parent)
 print("Model weights ready.")
+# Load text tokenizer (CPU only, no CUDA needed)
+text_tokenizer = sentencepiece.SentencePieceProcessor(TOKENIZER_PATH)
 # Global model cache - models loaded lazily inside @spaces.GPU
 _model_cache = {}
         lm = loaders.get_moshi_lm(MOSHI_WEIGHT, device=DEVICE)
         lm.eval()
         # Create LMGen wrapper
         frame_size = int(mimi.sample_rate / mimi.frame_rate)
         lm_gen = LMGen(
             "mimi": mimi,
             "other_mimi": other_mimi,
             "lm_gen": lm_gen,
             "frame_size": frame_size,
             "initialized": True,
         })
     mimi = models["mimi"]
     other_mimi = models["other_mimi"]
     lm_gen = models["lm_gen"]
     frame_size = models["frame_size"]
     # Process input audio