VoxSum / src /server /models /transcription.py
Luigi's picture
add non-tracked
9cd7aca
raw
history blame contribute delete
592 Bytes
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel, Field
class DiarizationOptions(BaseModel):
enable: bool = False
num_speakers: int = Field(-1, ge=-1, le=10)
cluster_threshold: float = Field(0.5, ge=0.1, le=1.0)
class TranscriptionRequest(BaseModel):
backend: Literal["moonshine", "sensevoice"] = "sensevoice"
model_name: str
vad_threshold: float = Field(0.5, ge=0.05, le=0.95)
language: str = "auto"
textnorm: Literal["withitn", "noitn"] = "withitn"
diarization: DiarizationOptions = DiarizationOptions()