File size: 3,028 Bytes
ea8f8db
 
cf10bac
ea8f8db
1fe2fca
ea8f8db
 
 
 
 
 
 
 
 
 
 
 
1fe2fca
ea8f8db
 
 
 
cf10bac
 
 
 
ea8f8db
cf10bac
 
 
ba7bcd3
 
08a9522
ba7bcd3
468cac0
08a9522
 
 
 
 
ba7bcd3
08a9522
 
 
ba7bcd3
08a9522
 
19edd1b
08a9522
 
 
 
 
 
 
19edd1b
 
08a9522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52e8dd2
 
 
 
 
 
 
 
08a9522
 
 
 
 
 
19edd1b
 
08a9522
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import streamlit as st
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings

def get_llm(model_name: str = "nvidia/nemotron-3-nano-30b-a3b"):
    api_key = os.getenv("NV_API_KEY")
    if not api_key and "NV_API_KEY" in st.secrets:
        api_key = st.secrets["NV_API_KEY"]
    
    if not api_key:
        raise ValueError("NVIDIA API Key not found in environment or secrets.")

    return ChatNVIDIA(
        model=model_name,
        temperature=0,
        seed=42,
        max_tokens=16384,
        extra_body={"chat_template_kwargs": {"enable_thinking":True}},
        api_key=api_key
    )

def get_embeddings():
    api_key = os.getenv("NV_API_KEY")
    if not api_key and "NV_API_KEY" in st.secrets:
        api_key = st.secrets["NV_API_KEY"]

    if not api_key:
        raise ValueError("NV_API_KEY not found in environment or secrets.")

    return NVIDIAEmbeddings(model="nvidia/llama-nemotron-embed-1b-v2", api_key=api_key)

def generate_podcast_audio(script_text: str):
    """Generate podcast audio using NVIDIA Riva TTS hosted API (magpie-tts-multilingual).
    
    Single-voice synthesis: speaker labels (Alex:/Jamie:) are removed.
    Returns PCM audio bytes (16-bit, mono, 22050 Hz) or None if failed.
    """
    api_key = os.getenv("NV_API_KEY")
    if not api_key and "NV_API_KEY" in st.secrets:
        api_key = st.secrets["NV_API_KEY"]
    
    if not api_key:
        print("NV_API_KEY not found for TTS")
        return None
    
    try:
        from riva.client import Auth
        from riva.client import TTSService
        
        # Setup authentication for NVIDIA hosted Riva TTS
        metadata = [
            ("function-id", "877104f7-e885-42b9-8de8-f6e4c6303969"),
            ("authorization", f"Bearer {api_key}")
        ]
        
        auth = Auth(None, True, "grpc.nvcf.nvidia.com:443", metadata)
        tts_service = TTSService(auth)
        
        # Remove speaker labels for single-voice synthesis
        lines = script_text.split('\n')
        clean_lines = []
        for line in lines:
            line = line.strip()
            if not line:
                continue
            # Remove speaker labels (Alex:/Jamie:)
            if line.startswith("Alex:") or line.startswith("Jamie:"):
                clean_lines.append(line.split(':', 1)[1].strip())
            else:
                clean_lines.append(line)
        
        clean_text = ' '.join(clean_lines)
        
        # Call TTS API - use keyword args matching gRPC SynthesizeSpeechRequest
        resp = tts_service.synthesize(
            text=clean_text,
            language_code="en-US",
            encoding=1,  # LINEAR_PCM
            sample_rate_hz=22050,
            voice_name="Magpie-Multilingual.EN-US.Aria"
        )
        
        # resp.audio contains PCM bytes (16-bit, mono)
        return resp.audio
        
    except Exception as e:
        print(f"NVIDIA Riva TTS failed: {e}")
        import traceback
        traceback.print_exc()
        return None