import { fileURLToPath } from 'url'; import { readFileSync, createWriteStream, existsSync } from 'fs'; import { Plugin } from './plugin.js'; import path from 'path'; import fs from 'fs'; export class CaptionPlugin extends Plugin { constructor(name, options) { super(name, options); } async applyPrerender(originalManuscript, jobId) { let transcript = originalManuscript.transcript for (let item of transcript) { let audioCaptionFileFileName = path.basename(item.audioCaptionFile) if (path.extname(audioCaptionFileFileName) == '.ass') { continue } let originalCaption = path.join(process.cwd(), item.audioCaptionFile) if (!fs.existsSync(originalCaption)) { originalCaption = path.join(process.cwd(), 'public', audioCaptionFileFileName) } if (!originalCaption) continue; let outputCaptionFile = originalCaption.replace('.json', '.ass') await this.generateCaptions( { ...this.options, captionFilePath: originalCaption, outputFilePath: outputCaptionFile, } ) item._audioCaptionFile = item.audioCaptionFile item.audioCaptionFile = item.audioCaptionFile.replace('.json', '.ass') } } async applyPostrender(originalManuscript, jobId, outFiles) { } /** * Generate ASS subtitle file with word highlighting * @param {Object} options * @param {string} options.captionFilePath - Path to input JSON caption file * @param {string} options.outputFilePath - Path to output ASS file * @param {string} options.capitalize - capitalize the font. upper, full-upper, small, none * @param {number} [options.tiltDegrees=8] - Tilt angle in degrees (alternates between +/-) * @param {number} [options.translateY=200] - Distance from bottom in pixels * @param {number} [options.widthPercent=80] - Width percentage for text centering (0-100) * @param {string} [options.fontName='Impact'] - Font name * @param {number} [options.fontSize=72] - Font size * @param {number} [options.wordsPerGroup=4] - Number of words per caption group * @param {number} [options.videoWidth=1920] - Video width for positioning * @param {number} [options.videoHeight=1080] - Video height for positioning * @returns {Promise} Path to generated ASS file */ async generateCaptions(options) { const { captionFilePath, outputFilePath, tiltDegrees = 8, translateY = 200, widthPercent = 80, fontName = 'Impact', fontSize = 72, capitalize = 'upper', wordsPerGroup = 4, videoWidth = 1920, videoHeight = 1080, fontColor = '#FFFFFF', fontHighlightColor = '#00FF00' } = options; const assFontColor = hexToASSColor(fontColor); const assHighlightColor = hexToASSColor(fontHighlightColor); const assHighlightColorInline = `${assHighlightColor}&`; const assFontColorInline = `${assFontColor}&`; // Read and parse JSON file const jsonData = JSON.parse(readFileSync(captionFilePath, 'utf-8')); const transcript = jsonData.transcript || ''; let words = jsonData.words || []; if (words.length === 0) { throw new Error('No words found in caption file'); } if (capitalize == 'full-upper') { words = words.map(w => ({ ...w, word: w.word.toUpperCase() })); } else if (capitalize == 'upper') { words = words.map(w => ({ ...w, word: w.word.charAt(0).toUpperCase() + w.word.slice(1) })); } else if (capitalize == 'small') { words = words.map(w => ({ ...w, word: w.word.toLowerCase() })); } // Assign sentence indices to words words = assignSentenceToWords(words, transcript); // Calculate margins for centering within width percentage const totalMargin = videoWidth * (1 - widthPercent / 100); const sideMargin = Math.floor(totalMargin / 2); // Create output stream const output = createWriteStream(outputFilePath); // Write header with calculated margins output.write( createASSHeader( videoWidth, videoHeight, fontName, fontSize, translateY, sideMargin, sideMargin, assFontColor, assHighlightColor ) ); // Process words in groups respecting sentence boundaries let i = 0; let groupIdx = 0; while (i < words.length) { const currentSentence = words[i].sentence_idx || 0; // Collect words for this group (up to wordsPerGroup, same sentence only) const wordGroup = []; let j = i; while (j < words.length && wordGroup.length < wordsPerGroup) { if ((words[j].sentence_idx || 0) === currentSentence) { wordGroup.push(words[j]); j++; } else { break; // Stop at sentence boundary } } if (wordGroup.length === 0) { i++; continue; } // Alternate tilt const currentTilt = groupIdx % 2 === 0 ? tiltDegrees : -tiltDegrees; const tiltTag = `{\\frz${currentTilt}}`; // Calculate positioning for centering const posTag = sideMargin > 0 ? `{\\an2\\pos(${videoWidth / 2},${videoHeight - translateY})}` : ''; // Get the full group duration (from first word start to last word end) const groupStart = wordGroup[0].start; const groupEnd = wordGroup[wordGroup.length - 1].end; // For each word in the group, create an event with highlighting // Use the FULL GROUP duration for each event to ensure no gaps for (let wordIdx = 0; wordIdx < wordGroup.length; wordIdx++) { const wordObj = wordGroup[wordIdx]; const wordStart = wordObj.start; const wordEnd = wordIdx < wordGroup.length - 1 ? wordGroup[wordIdx + 1].start : wordObj.end; // Build the caption text with highlighting const captionParts = wordGroup.map((w, idx) => { if (idx === wordIdx) { // Current word - highlighted in green return `{\\c${assHighlightColorInline}}${w.word}{\\c${assFontColorInline}}`; } else { // Other words - white return w.word; } }); const captionText = tiltTag + posTag + captionParts.join(' '); // Write dialogue line with timing from current word start to next word start (or group end) // This ensures continuous display with no gaps between words output.write(`Dialogue: 0,${formatTimestampASS(wordStart)},${formatTimestampASS(wordEnd)},Default,,0,0,0,,${captionText}\n`); } i = j; groupIdx++; } output.end(); return new Promise((resolve, reject) => { output.on('finish', () => { this.log(`Generated ${path.basename(outputFilePath)} captions`); resolve(outputFilePath); }); output.on('error', reject); }); } } /** * Format seconds to ASS timestamp format (H:MM:SS.cc) * @param {number} seconds * @returns {string} */ function formatTimestampASS(seconds) { const hours = Math.floor(seconds / 3600); const minutes = Math.floor((seconds % 3600) / 60); const secs = seconds % 60; return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toFixed(2).padStart(5, '0')}`; } /** * Split transcript into sentences * @param {string} transcript * @returns {string[]} */ function splitIntoSentences(transcript) { const parts = transcript.split(/([.!?]+)\s+/); const result = []; for (let i = 0; i < parts.length - 1; i += 2) { if (i + 1 < parts.length) { result.push(parts[i] + parts[i + 1]); } else { result.push(parts[i]); } } if (parts.length % 2 === 1) { result.push(parts[parts.length - 1]); } return result; } /** * Assign sentence index to each word * @param {Array} words * @param {string} transcript * @returns {Array} */ function assignSentenceToWords(words, transcript) { const sentences = splitIntoSentences(transcript); let wordIdx = 0; sentences.forEach((sentence, sentIdx) => { const sentenceWords = sentence.split(/\s+/); sentenceWords.forEach(() => { if (wordIdx < words.length) { words[wordIdx].sentence_idx = sentIdx; wordIdx++; } }); }); return words; } /** * Create ASS file header with styles * @param {number} videoWidth * @param {number} videoHeight * @param {string} fontName * @param {number} fontSize * @param {number} marginV * @returns {string} */ function createASSHeader( videoWidth = 1920, videoHeight = 1080, fontName = 'Impact', fontSize = 72, marginV = 200, marginL = 10, marginR = 10, primaryColor = '&H00FFFFFF', highlightColor = '&H0000FF00' ) { return `[Script Info] Title: Word-by-Word Captions ScriptType: v4.00+ WrapStyle: 0 PlayResX: ${videoWidth} PlayResY: ${videoHeight} ScaledBorderAndShadow: yes [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: Default,${fontName},${fontSize},${primaryColor},&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1 Style: Highlight,${fontName},${fontSize},${highlightColor},&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text `; } function hexToASSColor(hexValue) { if (typeof hexValue !== 'string') { throw new Error('fontColor values must be hex strings like #RRGGBB'); } const normalized = hexValue.trim().replace('#', ''); if (!/^[0-9a-fA-F]{6}$/.test(normalized)) { throw new Error(`Invalid hex color provided: ${hexValue}`); } const r = normalized.slice(0, 2); const g = normalized.slice(2, 4); const b = normalized.slice(4, 6); return `&H00${b}${g}${r}`.toUpperCase(); }