new-api / dto /audio.go
liuzhao521
Deploy New API v0.9.25+ (commit b47cf4ef) to HuggingFace Spaces
4674012
package dto
import (
"encoding/json"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type AudioRequest struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Instructions string `json:"instructions,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
Speed float64 `json:"speed,omitempty"`
StreamFormat string `json:"stream_format,omitempty"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
meta := &types.TokenCountMeta{
CombineText: r.Input,
TokenType: types.TokenTypeTextNumber,
}
return meta
}
func (r *AudioRequest) IsStream(c *gin.Context) bool {
return false
}
func (r *AudioRequest) SetModelName(modelName string) {
if modelName != "" {
r.Model = modelName
}
}
type AudioResponse struct {
Text string `json:"text"`
}
type WhisperVerboseJSONResponse struct {
Task string `json:"task,omitempty"`
Language string `json:"language,omitempty"`
Duration float64 `json:"duration,omitempty"`
Text string `json:"text,omitempty"`
Segments []Segment `json:"segments,omitempty"`
}
type Segment struct {
Id int `json:"id"`
Seek int `json:"seek"`
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
Tokens []int `json:"tokens"`
Temperature float64 `json:"temperature"`
AvgLogprob float64 `json:"avg_logprob"`
CompressionRatio float64 `json:"compression_ratio"`
NoSpeechProb float64 `json:"no_speech_prob"`
}