Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -498,6 +498,69 @@ async def presentation_chat(query: PresentationChatModel, background_tasks: Back
|
|
| 498 |
})
|
| 499 |
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
# ============================================================================
|
| 502 |
# Main Execution
|
| 503 |
# ============================================================================
|
|
|
|
| 498 |
})
|
| 499 |
|
| 500 |
|
| 501 |
+
# ============================================================================
|
| 502 |
+
# AUDIO ENDPOINTS
|
| 503 |
+
# ============================================================================
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
from enum import Enum
|
| 508 |
+
import io
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
openai_client = OpenAI()
|
| 512 |
+
|
| 513 |
+
class OpenaiTTSModels:
|
| 514 |
+
class ModelType(str, Enum):
|
| 515 |
+
tts_1_hd = "tts-1-hd"
|
| 516 |
+
tts_1 = "tts-1"
|
| 517 |
+
|
| 518 |
+
class VoiceType(str, Enum):
|
| 519 |
+
alloy = "alloy"
|
| 520 |
+
echo = "echo"
|
| 521 |
+
fable = "fable"
|
| 522 |
+
onyx = "onyx"
|
| 523 |
+
nova = "nova"
|
| 524 |
+
shimmer = "shimmer"
|
| 525 |
+
|
| 526 |
+
class OutputFormat(str, Enum):
|
| 527 |
+
mp3 = "mp3"
|
| 528 |
+
opus = "opus"
|
| 529 |
+
aac = "aac"
|
| 530 |
+
flac = "flac"
|
| 531 |
+
wav = "wav"
|
| 532 |
+
pcm = "pcm"
|
| 533 |
+
|
| 534 |
+
class AudioAPI:
|
| 535 |
+
class TTSRequest(BaseModel):
|
| 536 |
+
model: OpenaiTTSModels.ModelType = Field(..., description="The TTS model to use")
|
| 537 |
+
voice: OpenaiTTSModels.VoiceType = Field(..., description="The voice type for speech synthesis")
|
| 538 |
+
input: str = Field(..., description="The text to convert to speech")
|
| 539 |
+
output_format: OpenaiTTSModels.OutputFormat = Field(default=OpenaiTTSModels.OutputFormat.mp3, description="The audio output format")
|
| 540 |
+
|
| 541 |
+
@app.post("v2/tts")
|
| 542 |
+
async def text_to_speech(request: AudioAPI.TTSRequest, api_key: str = Depends(get_api_key)):
|
| 543 |
+
"""
|
| 544 |
+
Convert text to speech using OpenAI's TTS API with real-time audio streaming.
|
| 545 |
+
Requires API Key authentication via X-API-Key header.
|
| 546 |
+
"""
|
| 547 |
+
try:
|
| 548 |
+
response = openai_client.audio.speech.create(
|
| 549 |
+
model=request.model,
|
| 550 |
+
voice=request.voice,
|
| 551 |
+
input=request.input,
|
| 552 |
+
response_format=request.output_format
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
content_type = f"audio/{request.output_format.value}"
|
| 556 |
+
if request.output_format == OpenaiTTSModels.OutputFormat.pcm:
|
| 557 |
+
content_type = "audio/pcm"
|
| 558 |
+
|
| 559 |
+
return StreamingResponse(io.BytesIO(response.content), media_type=content_type)
|
| 560 |
+
|
| 561 |
+
except Exception as e:
|
| 562 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 563 |
+
|
| 564 |
# ============================================================================
|
| 565 |
# Main Execution
|
| 566 |
# ============================================================================
|