Spaces:
No application file
No application file
| /** | |
| * Single TTS Generation API | |
| * | |
| * Generates TTS audio for a single text string and returns base64-encoded audio. | |
| * Called by the client in parallel for each speech action after a scene is generated. | |
| * | |
| * POST /api/generate/tts | |
| */ | |
| import { NextRequest } from 'next/server'; | |
| import { generateTTS } from '@/lib/audio/tts-providers'; | |
| import { resolveTTSApiKey, resolveTTSBaseUrl } from '@/lib/server/provider-config'; | |
| import type { TTSProviderId } from '@/lib/audio/types'; | |
| import { createLogger } from '@/lib/logger'; | |
| import { apiError, apiSuccess } from '@/lib/server/api-response'; | |
| import { validateUrlForSSRF } from '@/lib/server/ssrf-guard'; | |
| const log = createLogger('TTS API'); | |
| export const maxDuration = 30; | |
| export async function POST(req: NextRequest) { | |
| try { | |
| const body = await req.json(); | |
| const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as { | |
| text: string; | |
| audioId: string; | |
| ttsProviderId: TTSProviderId; | |
| ttsVoice: string; | |
| ttsSpeed?: number; | |
| ttsApiKey?: string; | |
| ttsBaseUrl?: string; | |
| }; | |
| // Validate required fields | |
| if (!text || !audioId || !ttsProviderId || !ttsVoice) { | |
| return apiError( | |
| 'MISSING_REQUIRED_FIELD', | |
| 400, | |
| 'Missing required fields: text, audioId, ttsProviderId, ttsVoice', | |
| ); | |
| } | |
| // Reject browser-native TTS — must be handled client-side | |
| if (ttsProviderId === 'browser-native-tts') { | |
| return apiError('INVALID_REQUEST', 400, 'browser-native-tts must be handled client-side'); | |
| } | |
| const clientBaseUrl = ttsBaseUrl || undefined; | |
| if (clientBaseUrl && process.env.NODE_ENV === 'production') { | |
| const ssrfError = validateUrlForSSRF(clientBaseUrl); | |
| if (ssrfError) { | |
| return apiError('INVALID_URL', 403, ssrfError); | |
| } | |
| } | |
| const apiKey = clientBaseUrl | |
| ? ttsApiKey || '' | |
| : resolveTTSApiKey(ttsProviderId, ttsApiKey || undefined); | |
| const baseUrl = clientBaseUrl | |
| ? clientBaseUrl | |
| : resolveTTSBaseUrl(ttsProviderId, ttsBaseUrl || undefined); | |
| // Build TTS config | |
| const config = { | |
| providerId: ttsProviderId, | |
| voice: ttsVoice, | |
| speed: ttsSpeed ?? 1.0, | |
| apiKey, | |
| baseUrl, | |
| }; | |
| log.info( | |
| `Generating TTS: provider=${ttsProviderId}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`, | |
| ); | |
| // Generate audio | |
| const { audio, format } = await generateTTS(config, text); | |
| // Convert to base64 | |
| const base64 = Buffer.from(audio).toString('base64'); | |
| return apiSuccess({ audioId, base64, format }); | |
| } catch (error) { | |
| log.error('TTS generation error:', error); | |
| return apiError( | |
| 'GENERATION_FAILED', | |
| 500, | |
| error instanceof Error ? error.message : String(error), | |
| ); | |
| } | |
| } | |