Nitish kumar
Upload folder using huggingface_hub
c20f20c verified
/**
* Single TTS Generation API
*
* Generates TTS audio for a single text string and returns base64-encoded audio.
* Called by the client in parallel for each speech action after a scene is generated.
*
* POST /api/generate/tts
*/
import { NextRequest } from 'next/server';
import { generateTTS } from '@/lib/audio/tts-providers';
import { resolveTTSApiKey, resolveTTSBaseUrl } from '@/lib/server/provider-config';
import type { TTSProviderId } from '@/lib/audio/types';
import { createLogger } from '@/lib/logger';
import { apiError, apiSuccess } from '@/lib/server/api-response';
import { validateUrlForSSRF } from '@/lib/server/ssrf-guard';
const log = createLogger('TTS API');
export const maxDuration = 30;
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
};
// Validate required fields
if (!text || !audioId || !ttsProviderId || !ttsVoice) {
return apiError(
'MISSING_REQUIRED_FIELD',
400,
'Missing required fields: text, audioId, ttsProviderId, ttsVoice',
);
}
// Reject browser-native TTS — must be handled client-side
if (ttsProviderId === 'browser-native-tts') {
return apiError('INVALID_REQUEST', 400, 'browser-native-tts must be handled client-side');
}
const clientBaseUrl = ttsBaseUrl || undefined;
if (clientBaseUrl && process.env.NODE_ENV === 'production') {
const ssrfError = validateUrlForSSRF(clientBaseUrl);
if (ssrfError) {
return apiError('INVALID_URL', 403, ssrfError);
}
}
const apiKey = clientBaseUrl
? ttsApiKey || ''
: resolveTTSApiKey(ttsProviderId, ttsApiKey || undefined);
const baseUrl = clientBaseUrl
? clientBaseUrl
: resolveTTSBaseUrl(ttsProviderId, ttsBaseUrl || undefined);
// Build TTS config
const config = {
providerId: ttsProviderId,
voice: ttsVoice,
speed: ttsSpeed ?? 1.0,
apiKey,
baseUrl,
};
log.info(
`Generating TTS: provider=${ttsProviderId}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`,
);
// Generate audio
const { audio, format } = await generateTTS(config, text);
// Convert to base64
const base64 = Buffer.from(audio).toString('base64');
return apiSuccess({ audioId, base64, format });
} catch (error) {
log.error('TTS generation error:', error);
return apiError(
'GENERATION_FAILED',
500,
error instanceof Error ? error.message : String(error),
);
}
}