Spaces:
Sleeping
Sleeping
File size: 3,028 Bytes
ea8f8db cf10bac ea8f8db 1fe2fca ea8f8db 1fe2fca ea8f8db cf10bac ea8f8db cf10bac ba7bcd3 08a9522 ba7bcd3 468cac0 08a9522 ba7bcd3 08a9522 ba7bcd3 08a9522 19edd1b 08a9522 19edd1b 08a9522 52e8dd2 08a9522 19edd1b 08a9522 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | import os
import streamlit as st
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
def get_llm(model_name: str = "nvidia/nemotron-3-nano-30b-a3b"):
api_key = os.getenv("NV_API_KEY")
if not api_key and "NV_API_KEY" in st.secrets:
api_key = st.secrets["NV_API_KEY"]
if not api_key:
raise ValueError("NVIDIA API Key not found in environment or secrets.")
return ChatNVIDIA(
model=model_name,
temperature=0,
seed=42,
max_tokens=16384,
extra_body={"chat_template_kwargs": {"enable_thinking":True}},
api_key=api_key
)
def get_embeddings():
api_key = os.getenv("NV_API_KEY")
if not api_key and "NV_API_KEY" in st.secrets:
api_key = st.secrets["NV_API_KEY"]
if not api_key:
raise ValueError("NV_API_KEY not found in environment or secrets.")
return NVIDIAEmbeddings(model="nvidia/llama-nemotron-embed-1b-v2", api_key=api_key)
def generate_podcast_audio(script_text: str):
"""Generate podcast audio using NVIDIA Riva TTS hosted API (magpie-tts-multilingual).
Single-voice synthesis: speaker labels (Alex:/Jamie:) are removed.
Returns PCM audio bytes (16-bit, mono, 22050 Hz) or None if failed.
"""
api_key = os.getenv("NV_API_KEY")
if not api_key and "NV_API_KEY" in st.secrets:
api_key = st.secrets["NV_API_KEY"]
if not api_key:
print("NV_API_KEY not found for TTS")
return None
try:
from riva.client import Auth
from riva.client import TTSService
# Setup authentication for NVIDIA hosted Riva TTS
metadata = [
("function-id", "877104f7-e885-42b9-8de8-f6e4c6303969"),
("authorization", f"Bearer {api_key}")
]
auth = Auth(None, True, "grpc.nvcf.nvidia.com:443", metadata)
tts_service = TTSService(auth)
# Remove speaker labels for single-voice synthesis
lines = script_text.split('\n')
clean_lines = []
for line in lines:
line = line.strip()
if not line:
continue
# Remove speaker labels (Alex:/Jamie:)
if line.startswith("Alex:") or line.startswith("Jamie:"):
clean_lines.append(line.split(':', 1)[1].strip())
else:
clean_lines.append(line)
clean_text = ' '.join(clean_lines)
# Call TTS API - use keyword args matching gRPC SynthesizeSpeechRequest
resp = tts_service.synthesize(
text=clean_text,
language_code="en-US",
encoding=1, # LINEAR_PCM
sample_rate_hz=22050,
voice_name="Magpie-Multilingual.EN-US.Aria"
)
# resp.audio contains PCM bytes (16-bit, mono)
return resp.audio
except Exception as e:
print(f"NVIDIA Riva TTS failed: {e}")
import traceback
traceback.print_exc()
return None
|