import { exec } from 'child_process'; import { promisify } from 'util'; import { writeFile, readFile, unlink } from 'fs/promises'; import { join } from 'path'; import { randomBytes } from 'crypto'; const execAsync = promisify(exec); /** * Converts an audio buffer to MP3 using local FFmpeg if the file is OGG/OPUS. * This ensures compatibility with OpenAI Whisper, which frequently rejects native WhatsApp OPUS files. * Returns the original buffer if ffmpeg fails or isn't installed to attempt best-effort transcription. */ export async function convertToMp3IfNeeded(inputBuffer: Buffer, filename: string): Promise<{ buffer: Buffer; format: string }> { // We only strictly convert OGG or OPUS files (which are typical for WhatsApp). // If it's already an MP4/AAC or M4A, Whisper usually accepts it fine. if (!filename.toLowerCase().endsWith('.ogg') && !filename.toLowerCase().endsWith('.opus') && !filename.toLowerCase().endsWith('.oga')) { return { buffer: inputBuffer, format: filename.split('.').pop()! }; } const tempId = randomBytes(8).toString('hex'); const inputPath = join('/tmp', `in_${tempId}_${filename}`); const outputPath = join('/tmp', `out_${tempId}.mp3`); try { console.log(`[FFMPEG] Starting conversion for ${filename}...`); // Write the inbound buffer to a temp file await writeFile(inputPath, inputBuffer); // Run FFmpeg to convert it to a standard 128k MP3 // -y overwrites without prompting // -i specifies input file // -vn disables video just in case // -ar sets audio sample rate // -ac sets channels to mono (perfect for voice) // -b:a sets audio bitrate to 64k (sufficient for voice, saves bandwidth) await execAsync(`ffmpeg -y -i "${inputPath}" -vn -ar 44100 -ac 1 -b:a 64k "${outputPath}"`); // Read the converted file back into a buffer const mp3Buffer = await readFile(outputPath); console.log(`[FFMPEG] ✅ Successfully converted ${filename} to MP3.`); return { buffer: mp3Buffer, format: 'mp3' }; } catch (err: unknown) { console.error(`[FFMPEG] ⚠️ Conversion failed for ${filename}. Proceeding with original buffer. Error: ${(err instanceof Error ? (err instanceof Error ? err.message : String(err)) : String(err))}`); // If FFMPEG isn't installed or fails, we return the original buffer return { buffer: inputBuffer, format: filename.split('.').pop()! }; } finally { // Cleanup temp files asynchronously unlink(inputPath).catch(() => { }); unlink(outputPath).catch(() => { }); } }