remote-rdr / server-plugins /generate-captions.js
shiveshnavin's picture
Fix concat cleanup
3807db1
import { fileURLToPath } from 'url';
import { readFileSync, createWriteStream, existsSync } from 'fs';
import { Plugin } from './plugin.js';
import path from 'path';
import fs from 'fs';
export class CaptionPlugin extends Plugin {
constructor(name, options) {
super(name, options);
}
async applyPrerender(originalManuscript, jobId) {
let transcript = originalManuscript.transcript
for (let item of transcript) {
let audioCaptionFileFileName = path.basename(item.audioCaptionFile)
if (path.extname(audioCaptionFileFileName) == '.ass') {
continue
}
let originalCaption = path.join(process.cwd(), item.audioCaptionFile)
if (!fs.existsSync(originalCaption)) {
originalCaption = path.join(process.cwd(), 'public', audioCaptionFileFileName)
}
if (!originalCaption) continue;
let outputCaptionFile = originalCaption.replace('.json', '.ass')
await this.generateCaptions(
{
...this.options,
captionFilePath: originalCaption,
outputFilePath: outputCaptionFile,
}
)
item._audioCaptionFile = item.audioCaptionFile
item.audioCaptionFile = item.audioCaptionFile.replace('.json', '.ass')
}
}
async applyPostrender(originalManuscript, jobId, outFiles) {
}
/**
* Generate ASS subtitle file with word highlighting
* @param {Object} options
* @param {string} options.captionFilePath - Path to input JSON caption file
* @param {string} options.outputFilePath - Path to output ASS file
* @param {string} options.capitalize - capitalize the font. upper, full-upper, small, none
* @param {number} [options.tiltDegrees=8] - Tilt angle in degrees (alternates between +/-)
* @param {number} [options.translateY=200] - Distance from bottom in pixels
* @param {number} [options.widthPercent=80] - Width percentage for text centering (0-100)
* @param {string} [options.fontName='Impact'] - Font name
* @param {number} [options.fontSize=72] - Font size
* @param {number} [options.wordsPerGroup=4] - Number of words per caption group
* @param {number} [options.videoWidth=1920] - Video width for positioning
* @param {number} [options.videoHeight=1080] - Video height for positioning
* @returns {Promise<string>} Path to generated ASS file
*/
async generateCaptions(options) {
const {
captionFilePath,
outputFilePath,
tiltDegrees = 8,
translateY = 200,
widthPercent = 80,
fontName = 'Impact',
fontSize = 72,
capitalize = 'upper',
wordsPerGroup = 4,
videoWidth = 1920,
videoHeight = 1080,
fontColor = '#FFFFFF',
fontHighlightColor = '#00FF00'
} = options;
const assFontColor = hexToASSColor(fontColor);
const assHighlightColor = hexToASSColor(fontHighlightColor);
const assHighlightColorInline = `${assHighlightColor}&`;
const assFontColorInline = `${assFontColor}&`;
// Read and parse JSON file
const jsonData = JSON.parse(readFileSync(captionFilePath, 'utf-8'));
const transcript = jsonData.transcript || '';
let words = jsonData.words || [];
if (words.length === 0) {
throw new Error('No words found in caption file');
}
if (capitalize == 'full-upper') {
words = words.map(w => ({ ...w, word: w.word.toUpperCase() }));
}
else if (capitalize == 'upper') {
words = words.map(w => ({ ...w, word: w.word.charAt(0).toUpperCase() + w.word.slice(1) }));
}
else if (capitalize == 'small') {
words = words.map(w => ({ ...w, word: w.word.toLowerCase() }));
}
// Assign sentence indices to words
words = assignSentenceToWords(words, transcript);
// Calculate margins for centering within width percentage
const totalMargin = videoWidth * (1 - widthPercent / 100);
const sideMargin = Math.floor(totalMargin / 2);
// Create output stream
const output = createWriteStream(outputFilePath);
// Write header with calculated margins
output.write(
createASSHeader(
videoWidth,
videoHeight,
fontName,
fontSize,
translateY,
sideMargin,
sideMargin,
assFontColor,
assHighlightColor
)
);
// Process words in groups respecting sentence boundaries
let i = 0;
let groupIdx = 0;
while (i < words.length) {
const currentSentence = words[i].sentence_idx || 0;
// Collect words for this group (up to wordsPerGroup, same sentence only)
const wordGroup = [];
let j = i;
while (j < words.length && wordGroup.length < wordsPerGroup) {
if ((words[j].sentence_idx || 0) === currentSentence) {
wordGroup.push(words[j]);
j++;
} else {
break; // Stop at sentence boundary
}
}
if (wordGroup.length === 0) {
i++;
continue;
}
// Alternate tilt
const currentTilt = groupIdx % 2 === 0 ? tiltDegrees : -tiltDegrees;
const tiltTag = `{\\frz${currentTilt}}`;
// Calculate positioning for centering
const posTag = sideMargin > 0 ? `{\\an2\\pos(${videoWidth / 2},${videoHeight - translateY})}` : '';
// Get the full group duration (from first word start to last word end)
const groupStart = wordGroup[0].start;
const groupEnd = wordGroup[wordGroup.length - 1].end;
// For each word in the group, create an event with highlighting
// Use the FULL GROUP duration for each event to ensure no gaps
for (let wordIdx = 0; wordIdx < wordGroup.length; wordIdx++) {
const wordObj = wordGroup[wordIdx];
const wordStart = wordObj.start;
const wordEnd = wordIdx < wordGroup.length - 1 ? wordGroup[wordIdx + 1].start : wordObj.end;
// Build the caption text with highlighting
const captionParts = wordGroup.map((w, idx) => {
if (idx === wordIdx) {
// Current word - highlighted in green
return `{\\c${assHighlightColorInline}}${w.word}{\\c${assFontColorInline}}`;
} else {
// Other words - white
return w.word;
}
});
const captionText = tiltTag + posTag + captionParts.join(' ');
// Write dialogue line with timing from current word start to next word start (or group end)
// This ensures continuous display with no gaps between words
output.write(`Dialogue: 0,${formatTimestampASS(wordStart)},${formatTimestampASS(wordEnd)},Default,,0,0,0,,${captionText}\n`);
}
i = j;
groupIdx++;
}
output.end();
return new Promise((resolve, reject) => {
output.on('finish', () => {
this.log(`Generated ${path.basename(outputFilePath)} captions`);
resolve(outputFilePath);
});
output.on('error', reject);
});
}
}
/**
* Format seconds to ASS timestamp format (H:MM:SS.cc)
* @param {number} seconds
* @returns {string}
*/
function formatTimestampASS(seconds) {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const secs = seconds % 60;
return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toFixed(2).padStart(5, '0')}`;
}
/**
* Split transcript into sentences
* @param {string} transcript
* @returns {string[]}
*/
function splitIntoSentences(transcript) {
const parts = transcript.split(/([.!?]+)\s+/);
const result = [];
for (let i = 0; i < parts.length - 1; i += 2) {
if (i + 1 < parts.length) {
result.push(parts[i] + parts[i + 1]);
} else {
result.push(parts[i]);
}
}
if (parts.length % 2 === 1) {
result.push(parts[parts.length - 1]);
}
return result;
}
/**
* Assign sentence index to each word
* @param {Array} words
* @param {string} transcript
* @returns {Array}
*/
function assignSentenceToWords(words, transcript) {
const sentences = splitIntoSentences(transcript);
let wordIdx = 0;
sentences.forEach((sentence, sentIdx) => {
const sentenceWords = sentence.split(/\s+/);
sentenceWords.forEach(() => {
if (wordIdx < words.length) {
words[wordIdx].sentence_idx = sentIdx;
wordIdx++;
}
});
});
return words;
}
/**
* Create ASS file header with styles
* @param {number} videoWidth
* @param {number} videoHeight
* @param {string} fontName
* @param {number} fontSize
* @param {number} marginV
* @returns {string}
*/
function createASSHeader(
videoWidth = 1920,
videoHeight = 1080,
fontName = 'Impact',
fontSize = 72,
marginV = 200,
marginL = 10,
marginR = 10,
primaryColor = '&H00FFFFFF',
highlightColor = '&H0000FF00'
) {
return `[Script Info]
Title: Word-by-Word Captions
ScriptType: v4.00+
WrapStyle: 0
PlayResX: ${videoWidth}
PlayResY: ${videoHeight}
ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,${fontName},${fontSize},${primaryColor},&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1
Style: Highlight,${fontName},${fontSize},${highlightColor},&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
`;
}
function hexToASSColor(hexValue) {
if (typeof hexValue !== 'string') {
throw new Error('fontColor values must be hex strings like #RRGGBB');
}
const normalized = hexValue.trim().replace('#', '');
if (!/^[0-9a-fA-F]{6}$/.test(normalized)) {
throw new Error(`Invalid hex color provided: ${hexValue}`);
}
const r = normalized.slice(0, 2);
const g = normalized.slice(2, 4);
const b = normalized.slice(4, 6);
return `&H00${b}${g}${r}`.toUpperCase();
}