Spaces:
Running
Running
Refactor LLM and voice provider settings, enhance configuration options, and update dependencies
Browse files- .env.example +7 -14
- pyproject.toml +2 -2
- src/agent/llm_factory.py +48 -71
- src/core/settings.py +3 -17
- src/models/voice/factory.py +0 -41
- src/models/voice/nvidia.py +0 -91
- uv.lock +2 -2
.env.example
CHANGED
|
@@ -1,25 +1,18 @@
|
|
| 1 |
-
|
| 2 |
-
LLM_PROVIDER=nvidia # or "huggingface"
|
| 3 |
NVIDIA_API_KEY=your_nvidia_api_key_here
|
| 4 |
NVIDIA_MODEL=meta/llama-3.1-8b-instruct
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
#
|
| 8 |
-
# HF_MODEL=microsoft/DialoGPT-medium
|
| 9 |
-
# HF_TOKEN=your_huggingface_token_here # Get from: https://huggingface.co/settings/tokens
|
| 10 |
-
# HF_USE_INFERENCE_API=false # true to use the Hugging Face Inference API, false to run locally
|
| 11 |
-
# HF_TRUST_REMOTE_CODE=false # Enable when the repo requires custom model/tokenizer code
|
| 12 |
-
# HF_USE_FAST_TOKENIZER=false # Set to true when you need the fast tokenizer; disable to avoid legacy conversion issues
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
# Voice Provider
|
| 16 |
-
# NVIDIA API uses the same NVIDIA_API_KEY as the LLM provider
|
| 17 |
VOICE_PROVIDER=nvidia
|
| 18 |
-
|
| 19 |
-
# NVIDIA Voice Settings (default)
|
| 20 |
NVIDIA_VOICE_LANGUAGE=en-US
|
| 21 |
NVIDIA_VOICE_NAME=Magpie-Multilingual.EN-US.Aria
|
| 22 |
|
| 23 |
# NVIDIA TTS requires an endpoint from build.nvidia.com
|
| 24 |
-
# Get your TTS endpoint from: https://build.nvidia.com/
|
| 25 |
NVIDIA_TTS_ENDPOINT=https://your-tts-endpoint-here
|
|
|
|
| 1 |
+
# NVIDIA LLM Settings
|
|
|
|
| 2 |
NVIDIA_API_KEY=your_nvidia_api_key_here
|
| 3 |
NVIDIA_MODEL=meta/llama-3.1-8b-instruct
|
| 4 |
|
| 5 |
+
# HuggingFace Settings
|
| 6 |
+
HF_TOKEN=your_huggingface_token_here # Get from: https://huggingface.co/settings/tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# LLM Parameters
|
| 9 |
+
LLM_TEMPERATURE=0.7
|
| 10 |
+
LLM_MAX_TOKENS=1024
|
| 11 |
|
| 12 |
+
# Voice Provider Settings
|
|
|
|
| 13 |
VOICE_PROVIDER=nvidia
|
|
|
|
|
|
|
| 14 |
NVIDIA_VOICE_LANGUAGE=en-US
|
| 15 |
NVIDIA_VOICE_NAME=Magpie-Multilingual.EN-US.Aria
|
| 16 |
|
| 17 |
# NVIDIA TTS requires an endpoint from build.nvidia.com
|
|
|
|
| 18 |
NVIDIA_TTS_ENDPOINT=https://your-tts-endpoint-here
|
pyproject.toml
CHANGED
|
@@ -16,10 +16,10 @@ dependencies = [
|
|
| 16 |
"python-dotenv>=1.0.0",
|
| 17 |
"python-multipart>=0.0.22",
|
| 18 |
"streamlit>=1.53.1",
|
| 19 |
-
"transformers",
|
| 20 |
-
"torch",
|
| 21 |
"uvicorn[standard]>=0.40.0",
|
| 22 |
"websockets>=16.0",
|
|
|
|
|
|
|
| 23 |
"accelerate>=1.12.0",
|
| 24 |
]
|
| 25 |
|
|
|
|
| 16 |
"python-dotenv>=1.0.0",
|
| 17 |
"python-multipart>=0.0.22",
|
| 18 |
"streamlit>=1.53.1",
|
|
|
|
|
|
|
| 19 |
"uvicorn[standard]>=0.40.0",
|
| 20 |
"websockets>=16.0",
|
| 21 |
+
"transformers>=4.32.0",
|
| 22 |
+
"torch>=2.1.1",
|
| 23 |
"accelerate>=1.12.0",
|
| 24 |
]
|
| 25 |
|
src/agent/llm_factory.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
-
from
|
| 2 |
-
|
| 3 |
-
from langchain_core.language_models import BaseLanguageModel
|
| 4 |
-
from langchain_huggingface import HuggingFaceEndpoint, HuggingFacePipeline
|
| 5 |
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
| 6 |
|
| 7 |
from src.core.logger import logger
|
|
@@ -9,83 +7,62 @@ from src.core.settings import settings
|
|
| 9 |
|
| 10 |
|
| 11 |
class LLMFactory:
|
| 12 |
-
_instances: Dict[str, BaseLanguageModel] = {}
|
| 13 |
-
|
| 14 |
-
@classmethod
|
| 15 |
-
def create_llm(cls, provider: Optional[str] = None) -> BaseLanguageModel:
|
| 16 |
-
provider = (provider or settings.llm.LLM_PROVIDER).lower()
|
| 17 |
-
|
| 18 |
-
if provider in cls._instances:
|
| 19 |
-
return cls._instances[provider]
|
| 20 |
-
|
| 21 |
-
if provider == "nvidia":
|
| 22 |
-
llm = cls._create_nvidia_llm()
|
| 23 |
-
elif provider == "huggingface":
|
| 24 |
-
llm = cls._create_huggingface_llm()
|
| 25 |
-
else:
|
| 26 |
-
raise ValueError(f"Unknown LLM provider: {provider}")
|
| 27 |
-
|
| 28 |
-
cls._instances[provider] = llm
|
| 29 |
-
return llm
|
| 30 |
-
|
| 31 |
-
@classmethod
|
| 32 |
-
def reset_cache(cls, provider: Optional[str] = None) -> None:
|
| 33 |
-
if provider:
|
| 34 |
-
cls._instances.pop(provider.lower(), None)
|
| 35 |
-
else:
|
| 36 |
-
cls._instances.clear()
|
| 37 |
-
|
| 38 |
@staticmethod
|
| 39 |
-
def
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
if not settings.llm.NVIDIA_API_KEY:
|
| 43 |
raise ValueError("NVIDIA_API_KEY must be set to use the NVIDIA LLM provider.")
|
| 44 |
|
| 45 |
return ChatNVIDIA(
|
| 46 |
-
model=
|
| 47 |
api_key=settings.llm.NVIDIA_API_KEY,
|
| 48 |
-
temperature=
|
| 49 |
-
max_completion_tokens=
|
| 50 |
)
|
| 51 |
|
| 52 |
@staticmethod
|
| 53 |
-
def
|
| 54 |
-
model_id
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
if not settings.llm.HF_TOKEN or not settings.llm.HF_TOKEN.strip():
|
| 60 |
-
raise ValueError(
|
| 61 |
-
"HF_TOKEN must be provided when HF_USE_INFERENCE_API is true."
|
| 62 |
-
)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
return HuggingFaceEndpoint(
|
| 66 |
-
repo_id=model_id,
|
| 67 |
-
huggingfacehub_api_token=settings.llm.HF_TOKEN,
|
| 68 |
-
temperature=settings.llm.LLM_TEMPERATURE,
|
| 69 |
-
max_new_tokens=settings.llm.LLM_MAX_TOKENS,
|
| 70 |
-
)
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
"do_sample": True,
|
| 82 |
-
},
|
| 83 |
-
pipeline_kwargs={
|
| 84 |
-
"max_new_tokens": settings.llm.LLM_MAX_TOKENS,
|
| 85 |
-
"temperature": settings.llm.LLM_TEMPERATURE,
|
| 86 |
-
"do_sample": True,
|
| 87 |
-
"tokenizer_kwargs": {
|
| 88 |
-
"use_fast": settings.llm.HF_USE_FAST_TOKENIZER,
|
| 89 |
-
},
|
| 90 |
-
},
|
| 91 |
-
)
|
|
|
|
| 1 |
+
from huggingface_hub import InferenceClient
|
| 2 |
+
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
|
|
|
|
|
|
| 3 |
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
| 4 |
|
| 5 |
from src.core.logger import logger
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class LLMFactory:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
@staticmethod
|
| 11 |
+
def create_nvidia_llm(
|
| 12 |
+
model: str = settings.llm.NVIDIA_MODEL,
|
| 13 |
+
temperature: float = settings.llm.LLM_TEMPERATURE,
|
| 14 |
+
max_tokens: int = settings.llm.LLM_MAX_TOKENS,
|
| 15 |
+
) -> ChatNVIDIA:
|
| 16 |
+
logger.info(f"Initializing NVIDIA LLM: {model}")
|
| 17 |
|
| 18 |
if not settings.llm.NVIDIA_API_KEY:
|
| 19 |
raise ValueError("NVIDIA_API_KEY must be set to use the NVIDIA LLM provider.")
|
| 20 |
|
| 21 |
return ChatNVIDIA(
|
| 22 |
+
model=model,
|
| 23 |
api_key=settings.llm.NVIDIA_API_KEY,
|
| 24 |
+
temperature=temperature,
|
| 25 |
+
max_completion_tokens=max_tokens,
|
| 26 |
)
|
| 27 |
|
| 28 |
@staticmethod
|
| 29 |
+
def create_huggingface_llm(
|
| 30 |
+
model_id: str,
|
| 31 |
+
provider: str = "auto",
|
| 32 |
+
temperature: float = settings.llm.LLM_TEMPERATURE,
|
| 33 |
+
max_tokens: int = settings.llm.LLM_MAX_TOKENS,
|
| 34 |
+
) -> ChatHuggingFace:
|
| 35 |
+
token = (settings.llm.HF_TOKEN or "").strip()
|
| 36 |
+
if not token:
|
| 37 |
+
raise ValueError("HF_TOKEN must be set to use the HuggingFace LLM provider.")
|
| 38 |
+
|
| 39 |
+
logger.info(f"Initializing HuggingFace LLM: {model_id} via provider={provider}")
|
| 40 |
+
|
| 41 |
+
llm = HuggingFaceEndpoint(
|
| 42 |
+
repo_id=model_id,
|
| 43 |
+
provider=provider,
|
| 44 |
+
huggingfacehub_api_token=token,
|
| 45 |
+
temperature=temperature,
|
| 46 |
+
max_new_tokens=max_tokens,
|
| 47 |
+
)
|
| 48 |
+
return ChatHuggingFace(llm=llm)
|
| 49 |
+
|
| 50 |
+
@staticmethod
|
| 51 |
+
def create_huggingface_stt(model_id: str | None = None) -> InferenceClient:
|
| 52 |
+
token = (settings.llm.HF_TOKEN or "").strip()
|
| 53 |
+
if not token:
|
| 54 |
+
raise ValueError("HF_TOKEN must be set to use the HuggingFace STT provider.")
|
| 55 |
|
| 56 |
+
logger.info(f"Initializing HuggingFace STT: {model_id or 'default'}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
return InferenceClient(model=model_id, token=token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
@staticmethod
|
| 61 |
+
def create_huggingface_tts(model_id: str | None = None) -> InferenceClient:
|
| 62 |
+
token = (settings.llm.HF_TOKEN or "").strip()
|
| 63 |
+
if not token:
|
| 64 |
+
raise ValueError("HF_TOKEN must be set to use the HuggingFace TTS provider.")
|
| 65 |
+
|
| 66 |
+
logger.info(f"Initializing HuggingFace TTS: {model_id or 'default'}")
|
| 67 |
+
|
| 68 |
+
return InferenceClient(model=model_id, token=token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/core/settings.py
CHANGED
|
@@ -2,7 +2,7 @@ import json
|
|
| 2 |
from pathlib import Path
|
| 3 |
from typing import Any, Optional
|
| 4 |
|
| 5 |
-
from pydantic import Field, ValidationError
|
| 6 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
|
@@ -61,28 +61,14 @@ class VoiceSettings(CoreSettings):
|
|
| 61 |
|
| 62 |
|
| 63 |
class LLMSettings(CoreSettings):
|
| 64 |
-
LLM_PROVIDER: str = Field(default="nvidia")
|
| 65 |
-
|
| 66 |
NVIDIA_API_KEY: Optional[str] = Field(default=None)
|
| 67 |
NVIDIA_MODEL: str = Field(default="meta/llama-3.1-8b-instruct")
|
| 68 |
NVIDIA_BASE_URL: str = Field(default="https://integrate.api.nvidia.com/v1")
|
| 69 |
-
|
| 70 |
HF_TOKEN: Optional[str] = Field(default=None)
|
| 71 |
-
|
| 72 |
-
HF_USE_INFERENCE_API: bool = Field(default=False)
|
| 73 |
-
HF_TRUST_REMOTE_CODE: bool = Field(default=False)
|
| 74 |
-
HF_USE_FAST_TOKENIZER: bool = Field(default=False)
|
| 75 |
-
|
| 76 |
LLM_TEMPERATURE: float = Field(default=0.7, ge=0.0, le=2.0)
|
| 77 |
LLM_MAX_TOKENS: int = Field(default=1024, gt=0)
|
| 78 |
-
LLM_STREAMING: bool = Field(default=True)
|
| 79 |
-
|
| 80 |
-
@field_validator("LLM_PROVIDER")
|
| 81 |
-
@classmethod
|
| 82 |
-
def validate_provider(cls, v: str) -> str:
|
| 83 |
-
if v.lower() not in ["nvidia", "huggingface"]:
|
| 84 |
-
raise ValueError("LLM_PROVIDER must be 'nvidia' or 'huggingface'")
|
| 85 |
-
return v.lower()
|
| 86 |
|
| 87 |
|
| 88 |
class APISettings(CoreSettings):
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
from typing import Any, Optional
|
| 4 |
|
| 5 |
+
from pydantic import Field, ValidationError
|
| 6 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
class LLMSettings(CoreSettings):
|
|
|
|
|
|
|
| 64 |
NVIDIA_API_KEY: Optional[str] = Field(default=None)
|
| 65 |
NVIDIA_MODEL: str = Field(default="meta/llama-3.1-8b-instruct")
|
| 66 |
NVIDIA_BASE_URL: str = Field(default="https://integrate.api.nvidia.com/v1")
|
| 67 |
+
|
| 68 |
HF_TOKEN: Optional[str] = Field(default=None)
|
| 69 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
LLM_TEMPERATURE: float = Field(default=0.7, ge=0.0, le=2.0)
|
| 71 |
LLM_MAX_TOKENS: int = Field(default=1024, gt=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
class APISettings(CoreSettings):
|
src/models/voice/factory.py
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
from typing import Dict, Type, Callable
|
| 2 |
-
|
| 3 |
-
from src.core.logger import logger
|
| 4 |
-
from src.core.settings import settings
|
| 5 |
-
from src.models.voice.base import BaseVoiceProvider, VoiceProviderConfig
|
| 6 |
-
from src.models.voice.nvidia import NvidiaVoiceProvider, NvidiaConfig
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
class VoiceProviderFactory:
|
| 10 |
-
_registry: Dict[str, Callable[[VoiceProviderConfig], BaseVoiceProvider]] = {}
|
| 11 |
-
|
| 12 |
-
@classmethod
|
| 13 |
-
def register(cls, name: str, provider_class: Type[BaseVoiceProvider]) -> None:
|
| 14 |
-
cls._registry[name.lower()] = provider_class
|
| 15 |
-
logger.debug(f"Registered voice provider: {name}")
|
| 16 |
-
|
| 17 |
-
@classmethod
|
| 18 |
-
def create_provider(cls, provider_name: str = None) -> BaseVoiceProvider:
|
| 19 |
-
provider_name = provider_name or settings.voice.VOICE_PROVIDER
|
| 20 |
-
provider_name = provider_name.lower()
|
| 21 |
-
|
| 22 |
-
if provider_name not in cls._registry:
|
| 23 |
-
raise ValueError(f"Unknown voice provider: {provider_name}. Available: {list(cls._registry.keys())}")
|
| 24 |
-
|
| 25 |
-
logger.info(f"Creating voice provider: {provider_name}")
|
| 26 |
-
|
| 27 |
-
if provider_name == "nvidia":
|
| 28 |
-
config = NvidiaConfig(
|
| 29 |
-
api_key=settings.llm.NVIDIA_API_KEY,
|
| 30 |
-
language=settings.voice.NVIDIA_VOICE_LANGUAGE,
|
| 31 |
-
voice_name=settings.voice.NVIDIA_VOICE_NAME,
|
| 32 |
-
tts_model=settings.voice.NVIDIA_TTS_MODEL,
|
| 33 |
-
tts_endpoint=settings.voice.NVIDIA_TTS_ENDPOINT,
|
| 34 |
-
sample_rate_output=settings.voice.SAMPLE_RATE_OUTPUT,
|
| 35 |
-
)
|
| 36 |
-
return cls._registry[provider_name](config)
|
| 37 |
-
|
| 38 |
-
raise NotImplementedError(f"Configuration for {provider_name} not yet implemented")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
VoiceProviderFactory.register("nvidia", NvidiaVoiceProvider)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/models/voice/nvidia.py
DELETED
|
@@ -1,91 +0,0 @@
|
|
| 1 |
-
import asyncio
|
| 2 |
-
from typing import AsyncIterator, Optional
|
| 3 |
-
|
| 4 |
-
import httpx
|
| 5 |
-
|
| 6 |
-
from src.core.logger import logger
|
| 7 |
-
from src.models.voice.base import BaseVoiceProvider, VoiceProviderConfig
|
| 8 |
-
from src.models.voice.types import TranscriptionResult, VADInfo
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class NvidiaConfig(VoiceProviderConfig):
|
| 12 |
-
provider_name: str = "nvidia"
|
| 13 |
-
api_key: str
|
| 14 |
-
language: str = "en-US"
|
| 15 |
-
voice_name: str = "Magpie-Multilingual.EN-US.Aria"
|
| 16 |
-
tts_model: str = "magpie-tts-multilingual"
|
| 17 |
-
tts_endpoint: str = ""
|
| 18 |
-
sample_rate_output: int = 48000
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
class NvidiaVoiceProvider(BaseVoiceProvider):
|
| 22 |
-
def __init__(self, config: NvidiaConfig):
|
| 23 |
-
super().__init__(config)
|
| 24 |
-
self.config: NvidiaConfig = config
|
| 25 |
-
self._current_vad: Optional[VADInfo] = None
|
| 26 |
-
|
| 27 |
-
async def connect(self) -> None:
|
| 28 |
-
# No connection needed for HTTP API
|
| 29 |
-
self._connected = True
|
| 30 |
-
logger.info("NVIDIA API TTS provider ready")
|
| 31 |
-
|
| 32 |
-
async def disconnect(self) -> None:
|
| 33 |
-
self._connected = False
|
| 34 |
-
logger.info("NVIDIA API TTS provider disconnected")
|
| 35 |
-
|
| 36 |
-
async def text_to_speech(
|
| 37 |
-
self, text: str, stream: bool = True
|
| 38 |
-
) -> AsyncIterator[bytes]:
|
| 39 |
-
if not self.is_connected:
|
| 40 |
-
raise RuntimeError("NVIDIA API provider not connected")
|
| 41 |
-
|
| 42 |
-
if not self.config.tts_endpoint:
|
| 43 |
-
raise RuntimeError(
|
| 44 |
-
"TTS requires NVIDIA_TTS_ENDPOINT to be set. "
|
| 45 |
-
"Get a TTS endpoint from: https://build.nvidia.com/"
|
| 46 |
-
)
|
| 47 |
-
|
| 48 |
-
async for chunk in self._text_to_speech_http(text, stream):
|
| 49 |
-
yield chunk
|
| 50 |
-
|
| 51 |
-
async def _text_to_speech_http(
|
| 52 |
-
self, text: str, stream: bool = True
|
| 53 |
-
) -> AsyncIterator[bytes]:
|
| 54 |
-
endpoint = self.config.tts_endpoint.rstrip("/")
|
| 55 |
-
url = f"{endpoint}/v1/audio/synthesize"
|
| 56 |
-
|
| 57 |
-
try:
|
| 58 |
-
logger.debug(f"Generating speech via HTTP API for text: {text[:50]}...")
|
| 59 |
-
|
| 60 |
-
headers = {
|
| 61 |
-
"Authorization": f"Bearer {self.config.api_key}",
|
| 62 |
-
"Content-Type": "application/json"
|
| 63 |
-
}
|
| 64 |
-
|
| 65 |
-
payload = {
|
| 66 |
-
"language": self.config.language,
|
| 67 |
-
"text": text,
|
| 68 |
-
"voice": self.config.voice_name,
|
| 69 |
-
"sample_rate_hz": self.config.sample_rate_output,
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 73 |
-
response = await client.post(url, json=payload, headers=headers)
|
| 74 |
-
response.raise_for_status()
|
| 75 |
-
|
| 76 |
-
# For streaming, we need to handle the response appropriately
|
| 77 |
-
# For now, return the full content
|
| 78 |
-
yield response.content
|
| 79 |
-
|
| 80 |
-
logger.debug("HTTP TTS generation complete")
|
| 81 |
-
|
| 82 |
-
except httpx.HTTPStatusError as e:
|
| 83 |
-
logger.error(f"HTTP error in NVIDIA TTS API: {e.response.status_code} - {e.response.text}")
|
| 84 |
-
raise RuntimeError(f"NVIDIA TTS API error: {e.response.status_code}") from e
|
| 85 |
-
except Exception as e:
|
| 86 |
-
logger.error(f"Error in NVIDIA HTTP TTS API: {e}")
|
| 87 |
-
raise
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
async def get_vad_info(self) -> Optional[VADInfo]:
|
| 91 |
-
return self._current_vad
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uv.lock
CHANGED
|
@@ -1099,8 +1099,8 @@ requires-dist = [
|
|
| 1099 |
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
| 1100 |
{ name = "python-multipart", specifier = ">=0.0.22" },
|
| 1101 |
{ name = "streamlit", specifier = ">=1.53.1" },
|
| 1102 |
-
{ name = "torch" },
|
| 1103 |
-
{ name = "transformers" },
|
| 1104 |
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.40.0" },
|
| 1105 |
{ name = "websockets", specifier = ">=16.0" },
|
| 1106 |
]
|
|
|
|
| 1099 |
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
| 1100 |
{ name = "python-multipart", specifier = ">=0.0.22" },
|
| 1101 |
{ name = "streamlit", specifier = ">=1.53.1" },
|
| 1102 |
+
{ name = "torch", specifier = ">=2.1.1" },
|
| 1103 |
+
{ name = "transformers", specifier = ">=4.32.0" },
|
| 1104 |
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.40.0" },
|
| 1105 |
{ name = "websockets", specifier = ">=16.0" },
|
| 1106 |
]
|