Spaces:

Remostartdev
/

HUMAN_STREAMING_AI

Paused

App Files Files Community

HUMAN_STREAMING_AI / app /ai_brain.py

drrobot9

Upload folder using huggingface_hub

63730eb verified 29 days ago

raw

history blame contribute delete

2.13 kB

	# app/ai_brain.py

	import torch
	import numpy as np
	from huggingface_hub import hf_hub_download
	from moshi.models import loaders, LMGen
	import sentencepiece

	HF_REPO = "nvidia/personaplex-7b-v1"
	DEVICE = "cuda"

	VOICE = "NATM2"
	PERSONA = (
	"You are a warm, friendly male assistant. "
	"Speak naturally, be engaging, supportive, and conversational."
	)

	class PersonaPlexBrain:
	def __init__(self):
	self._load_models()

	def _load_models(self):
	mimi_w = hf_hub_download(HF_REPO, loaders.MIMI_NAME)
	moshi_w = hf_hub_download(HF_REPO, loaders.MOSHI_NAME)
	tok_path = hf_hub_download(HF_REPO, loaders.TEXT_TOKENIZER_NAME)
	voices = hf_hub_download(HF_REPO, "voices.tgz")

	self.tokenizer = sentencepiece.SentencePieceProcessor(tok_path)

	self.mimi = loaders.get_mimi(mimi_w, DEVICE)
	self.other_mimi = loaders.get_mimi(mimi_w, DEVICE)
	lm = loaders.get_moshi_lm(moshi_w, device=DEVICE)
	lm.eval()

	self.lm_gen = LMGen(
	lm,
	sample_rate=self.mimi.sample_rate,
	device=DEVICE,
	frame_rate=self.mimi.frame_rate,
	)

	# Load male voice embedding
	import tarfile, os
	import pathlib

	vdir = pathlib.Path(voices).parent / "voices"
	if not vdir.exists():
	with tarfile.open(voices) as t:
	t.extractall(vdir.parent)

	voice_path = vdir / f"{VOICE}.pt"
	self.lm_gen.load_voice_prompt_embeddings(str(voice_path))

	# Persona conditioning
	text = f"<system> {PERSONA} <system>"
	self.lm_gen.text_prompt_tokens = self.tokenizer.encode(text)

	def process_audio_frame(self, frame: np.ndarray):
	"""Process one audio frame → return response frame"""

	x = torch.from_numpy(frame).float().to(DEVICE)[None, None, :]
	codes = self.mimi.encode(x)

	tokens = self.lm_gen.step(codes[:, :, 0:1])
	if tokens is None:
	return None

	audio_tokens = tokens[:, 1:9, :]
	pcm = self.other_mimi.decode(audio_tokens)

	return pcm[0, 0].cpu().numpy()