Spaces:
Running
Running
| import { fileURLToPath } from 'url'; | |
| import { readFileSync, createWriteStream, existsSync } from 'fs'; | |
| import { Plugin } from './plugin.js'; | |
| import path from 'path'; | |
| import fs from 'fs'; | |
| export class CaptionPlugin extends Plugin { | |
| constructor(name, options) { | |
| super(name, options); | |
| } | |
| async applyPrerender(originalManuscript, jobId) { | |
| let transcript = originalManuscript.transcript | |
| for (let item of transcript) { | |
| let audioCaptionFileFileName = path.basename(item.audioCaptionFile) | |
| if (path.extname(audioCaptionFileFileName) == '.ass') { | |
| continue | |
| } | |
| let originalCaption = path.join(process.cwd(), item.audioCaptionFile) | |
| if (!fs.existsSync(originalCaption)) { | |
| originalCaption = path.join(process.cwd(), 'public', audioCaptionFileFileName) | |
| } | |
| if (!originalCaption) continue; | |
| let outputCaptionFile = originalCaption.replace('.json', '.ass') | |
| await this.generateCaptions( | |
| { | |
| ...this.options, | |
| captionFilePath: originalCaption, | |
| outputFilePath: outputCaptionFile, | |
| } | |
| ) | |
| item._audioCaptionFile = item.audioCaptionFile | |
| item.audioCaptionFile = item.audioCaptionFile.replace('.json', '.ass') | |
| } | |
| } | |
| async applyPostrender(originalManuscript, jobId, outFiles) { | |
| } | |
| /** | |
| * Generate ASS subtitle file with word highlighting | |
| * @param {Object} options | |
| * @param {string} options.captionFilePath - Path to input JSON caption file | |
| * @param {string} options.outputFilePath - Path to output ASS file | |
| * @param {string} options.capitalize - capitalize the font. upper, full-upper, small, none | |
| * @param {number} [options.tiltDegrees=8] - Tilt angle in degrees (alternates between +/-) | |
| * @param {number} [options.translateY=200] - Distance from bottom in pixels | |
| * @param {number} [options.widthPercent=80] - Width percentage for text centering (0-100) | |
| * @param {string} [options.fontName='Impact'] - Font name | |
| * @param {number} [options.fontSize=72] - Font size | |
| * @param {number} [options.wordsPerGroup=4] - Number of words per caption group | |
| * @param {number} [options.videoWidth=1920] - Video width for positioning | |
| * @param {number} [options.videoHeight=1080] - Video height for positioning | |
| * @returns {Promise<string>} Path to generated ASS file | |
| */ | |
| async generateCaptions(options) { | |
| const { | |
| captionFilePath, | |
| outputFilePath, | |
| tiltDegrees = 8, | |
| translateY = 200, | |
| widthPercent = 80, | |
| fontName = 'Impact', | |
| fontSize = 72, | |
| capitalize = 'upper', | |
| wordsPerGroup = 4, | |
| videoWidth = 1920, | |
| videoHeight = 1080, | |
| fontColor = '#FFFFFF', | |
| fontHighlightColor = '#00FF00' | |
| } = options; | |
| const assFontColor = hexToASSColor(fontColor); | |
| const assHighlightColor = hexToASSColor(fontHighlightColor); | |
| const assHighlightColorInline = `${assHighlightColor}&`; | |
| const assFontColorInline = `${assFontColor}&`; | |
| // Read and parse JSON file | |
| const jsonData = JSON.parse(readFileSync(captionFilePath, 'utf-8')); | |
| const transcript = jsonData.transcript || ''; | |
| let words = jsonData.words || []; | |
| if (words.length === 0) { | |
| throw new Error('No words found in caption file'); | |
| } | |
| if (capitalize == 'full-upper') { | |
| words = words.map(w => ({ ...w, word: w.word.toUpperCase() })); | |
| } | |
| else if (capitalize == 'upper') { | |
| words = words.map(w => ({ ...w, word: w.word.charAt(0).toUpperCase() + w.word.slice(1) })); | |
| } | |
| else if (capitalize == 'small') { | |
| words = words.map(w => ({ ...w, word: w.word.toLowerCase() })); | |
| } | |
| // Assign sentence indices to words | |
| words = assignSentenceToWords(words, transcript); | |
| // Calculate margins for centering within width percentage | |
| const totalMargin = videoWidth * (1 - widthPercent / 100); | |
| const sideMargin = Math.floor(totalMargin / 2); | |
| // Create output stream | |
| const output = createWriteStream(outputFilePath); | |
| // Write header with calculated margins | |
| output.write( | |
| createASSHeader( | |
| videoWidth, | |
| videoHeight, | |
| fontName, | |
| fontSize, | |
| translateY, | |
| sideMargin, | |
| sideMargin, | |
| assFontColor, | |
| assHighlightColor | |
| ) | |
| ); | |
| // Process words in groups respecting sentence boundaries | |
| let i = 0; | |
| let groupIdx = 0; | |
| while (i < words.length) { | |
| const currentSentence = words[i].sentence_idx || 0; | |
| // Collect words for this group (up to wordsPerGroup, same sentence only) | |
| const wordGroup = []; | |
| let j = i; | |
| while (j < words.length && wordGroup.length < wordsPerGroup) { | |
| if ((words[j].sentence_idx || 0) === currentSentence) { | |
| wordGroup.push(words[j]); | |
| j++; | |
| } else { | |
| break; // Stop at sentence boundary | |
| } | |
| } | |
| if (wordGroup.length === 0) { | |
| i++; | |
| continue; | |
| } | |
| // Alternate tilt | |
| const currentTilt = groupIdx % 2 === 0 ? tiltDegrees : -tiltDegrees; | |
| const tiltTag = `{\\frz${currentTilt}}`; | |
| // Calculate positioning for centering | |
| const posTag = sideMargin > 0 ? `{\\an2\\pos(${videoWidth / 2},${videoHeight - translateY})}` : ''; | |
| // Get the full group duration (from first word start to last word end) | |
| const groupStart = wordGroup[0].start; | |
| const groupEnd = wordGroup[wordGroup.length - 1].end; | |
| // For each word in the group, create an event with highlighting | |
| // Use the FULL GROUP duration for each event to ensure no gaps | |
| for (let wordIdx = 0; wordIdx < wordGroup.length; wordIdx++) { | |
| const wordObj = wordGroup[wordIdx]; | |
| const wordStart = wordObj.start; | |
| const wordEnd = wordIdx < wordGroup.length - 1 ? wordGroup[wordIdx + 1].start : wordObj.end; | |
| // Build the caption text with highlighting | |
| const captionParts = wordGroup.map((w, idx) => { | |
| if (idx === wordIdx) { | |
| // Current word - highlighted in green | |
| return `{\\c${assHighlightColorInline}}${w.word}{\\c${assFontColorInline}}`; | |
| } else { | |
| // Other words - white | |
| return w.word; | |
| } | |
| }); | |
| const captionText = tiltTag + posTag + captionParts.join(' '); | |
| // Write dialogue line with timing from current word start to next word start (or group end) | |
| // This ensures continuous display with no gaps between words | |
| output.write(`Dialogue: 0,${formatTimestampASS(wordStart)},${formatTimestampASS(wordEnd)},Default,,0,0,0,,${captionText}\n`); | |
| } | |
| i = j; | |
| groupIdx++; | |
| } | |
| output.end(); | |
| return new Promise((resolve, reject) => { | |
| output.on('finish', () => { | |
| this.log(`Generated ${path.basename(outputFilePath)} captions`); | |
| resolve(outputFilePath); | |
| }); | |
| output.on('error', reject); | |
| }); | |
| } | |
| } | |
| /** | |
| * Format seconds to ASS timestamp format (H:MM:SS.cc) | |
| * @param {number} seconds | |
| * @returns {string} | |
| */ | |
| function formatTimestampASS(seconds) { | |
| const hours = Math.floor(seconds / 3600); | |
| const minutes = Math.floor((seconds % 3600) / 60); | |
| const secs = seconds % 60; | |
| return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toFixed(2).padStart(5, '0')}`; | |
| } | |
| /** | |
| * Split transcript into sentences | |
| * @param {string} transcript | |
| * @returns {string[]} | |
| */ | |
| function splitIntoSentences(transcript) { | |
| const parts = transcript.split(/([.!?]+)\s+/); | |
| const result = []; | |
| for (let i = 0; i < parts.length - 1; i += 2) { | |
| if (i + 1 < parts.length) { | |
| result.push(parts[i] + parts[i + 1]); | |
| } else { | |
| result.push(parts[i]); | |
| } | |
| } | |
| if (parts.length % 2 === 1) { | |
| result.push(parts[parts.length - 1]); | |
| } | |
| return result; | |
| } | |
| /** | |
| * Assign sentence index to each word | |
| * @param {Array} words | |
| * @param {string} transcript | |
| * @returns {Array} | |
| */ | |
| function assignSentenceToWords(words, transcript) { | |
| const sentences = splitIntoSentences(transcript); | |
| let wordIdx = 0; | |
| sentences.forEach((sentence, sentIdx) => { | |
| const sentenceWords = sentence.split(/\s+/); | |
| sentenceWords.forEach(() => { | |
| if (wordIdx < words.length) { | |
| words[wordIdx].sentence_idx = sentIdx; | |
| wordIdx++; | |
| } | |
| }); | |
| }); | |
| return words; | |
| } | |
| /** | |
| * Create ASS file header with styles | |
| * @param {number} videoWidth | |
| * @param {number} videoHeight | |
| * @param {string} fontName | |
| * @param {number} fontSize | |
| * @param {number} marginV | |
| * @returns {string} | |
| */ | |
| function createASSHeader( | |
| videoWidth = 1920, | |
| videoHeight = 1080, | |
| fontName = 'Impact', | |
| fontSize = 72, | |
| marginV = 200, | |
| marginL = 10, | |
| marginR = 10, | |
| primaryColor = '&H00FFFFFF', | |
| highlightColor = '&H0000FF00' | |
| ) { | |
| return `[Script Info] | |
| Title: Word-by-Word Captions | |
| ScriptType: v4.00+ | |
| WrapStyle: 0 | |
| PlayResX: ${videoWidth} | |
| PlayResY: ${videoHeight} | |
| ScaledBorderAndShadow: yes | |
| [V4+ Styles] | |
| Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
| Style: Default,${fontName},${fontSize},${primaryColor},&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1 | |
| Style: Highlight,${fontName},${fontSize},${highlightColor},&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1 | |
| [Events] | |
| Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
| `; | |
| } | |
| function hexToASSColor(hexValue) { | |
| if (typeof hexValue !== 'string') { | |
| throw new Error('fontColor values must be hex strings like #RRGGBB'); | |
| } | |
| const normalized = hexValue.trim().replace('#', ''); | |
| if (!/^[0-9a-fA-F]{6}$/.test(normalized)) { | |
| throw new Error(`Invalid hex color provided: ${hexValue}`); | |
| } | |
| const r = normalized.slice(0, 2); | |
| const g = normalized.slice(2, 4); | |
| const b = normalized.slice(4, 6); | |
| return `&H00${b}${g}${r}`.toUpperCase(); | |
| } | |