MohamedRashad commited on
Commit
a3b50ab
·
1 Parent(s): 8c0b009

Reorganize imports for moshi models and streamline text tokenizer loading in get_models function

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -9,10 +9,6 @@ from typing import Optional
9
  from huggingface_hub import hf_hub_download
10
  import sentencepiece
11
 
12
- # PersonaPlex model imports - installed via: pip install git+https://github.com/NVIDIA/personaplex.git#subdirectory=moshi
13
- from moshi.models import loaders, LMGen
14
- from moshi.models.lm import load_audio, _iterate_audio, encode_from_sphn
15
-
16
  # Configuration
17
  HF_REPO = "nvidia/personaplex-7b-v1"
18
  DEVICE = "cuda"
@@ -34,6 +30,10 @@ EXAMPLE_PERSONAS = [
34
  "You enjoy having a good conversation. Have a technical discussion about fixing a reactor core on a spaceship to Mars. You are an astronaut on a Mars mission. Your name is Alex.",
35
  ]
36
 
 
 
 
 
37
  # Pre-download model weights at startup (cached by huggingface_hub)
38
  print("Downloading model weights...")
39
  MIMI_WEIGHT = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
@@ -49,6 +49,9 @@ if not VOICES_DIR.exists():
49
  tar.extractall(path=Path(VOICES_TGZ).parent)
50
  print("Model weights ready.")
51
 
 
 
 
52
  # Global model cache - models loaded lazily inside @spaces.GPU
53
  _model_cache = {}
54
 
@@ -67,9 +70,6 @@ def get_models():
67
  lm = loaders.get_moshi_lm(MOSHI_WEIGHT, device=DEVICE)
68
  lm.eval()
69
 
70
- # Load text tokenizer
71
- text_tokenizer = sentencepiece.SentencePieceProcessor(TOKENIZER_PATH)
72
-
73
  # Create LMGen wrapper
74
  frame_size = int(mimi.sample_rate / mimi.frame_rate)
75
  lm_gen = LMGen(
@@ -93,7 +93,6 @@ def get_models():
93
  "mimi": mimi,
94
  "other_mimi": other_mimi,
95
  "lm_gen": lm_gen,
96
- "text_tokenizer": text_tokenizer,
97
  "frame_size": frame_size,
98
  "initialized": True,
99
  })
@@ -131,7 +130,6 @@ def generate_response(audio_input, persona: str, voice: str):
131
  mimi = models["mimi"]
132
  other_mimi = models["other_mimi"]
133
  lm_gen = models["lm_gen"]
134
- text_tokenizer = models["text_tokenizer"]
135
  frame_size = models["frame_size"]
136
 
137
  # Process input audio
 
9
  from huggingface_hub import hf_hub_download
10
  import sentencepiece
11
 
 
 
 
 
12
  # Configuration
13
  HF_REPO = "nvidia/personaplex-7b-v1"
14
  DEVICE = "cuda"
 
30
  "You enjoy having a good conversation. Have a technical discussion about fixing a reactor core on a spaceship to Mars. You are an astronaut on a Mars mission. Your name is Alex.",
31
  ]
32
 
33
+ # Import moshi after spaces to allow interception
34
+ from moshi.models import loaders, LMGen
35
+ from moshi.models.lm import load_audio, _iterate_audio, encode_from_sphn
36
+
37
  # Pre-download model weights at startup (cached by huggingface_hub)
38
  print("Downloading model weights...")
39
  MIMI_WEIGHT = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
 
49
  tar.extractall(path=Path(VOICES_TGZ).parent)
50
  print("Model weights ready.")
51
 
52
+ # Load text tokenizer (CPU only, no CUDA needed)
53
+ text_tokenizer = sentencepiece.SentencePieceProcessor(TOKENIZER_PATH)
54
+
55
  # Global model cache - models loaded lazily inside @spaces.GPU
56
  _model_cache = {}
57
 
 
70
  lm = loaders.get_moshi_lm(MOSHI_WEIGHT, device=DEVICE)
71
  lm.eval()
72
 
 
 
 
73
  # Create LMGen wrapper
74
  frame_size = int(mimi.sample_rate / mimi.frame_rate)
75
  lm_gen = LMGen(
 
93
  "mimi": mimi,
94
  "other_mimi": other_mimi,
95
  "lm_gen": lm_gen,
 
96
  "frame_size": frame_size,
97
  "initialized": True,
98
  })
 
130
  mimi = models["mimi"]
131
  other_mimi = models["other_mimi"]
132
  lm_gen = models["lm_gen"]
 
133
  frame_size = models["frame_size"]
134
 
135
  # Process input audio