Spaces:
Running
Running
Commit ·
8ba367b
1
Parent(s): 3d8e650
Add caption addition
Browse files- .gitignore +2 -1
- common-utils +1 -1
- server-plugins/generate-captions.js +261 -0
.gitignore
CHANGED
|
@@ -19,4 +19,5 @@ uploads/
|
|
| 19 |
uploads/**
|
| 20 |
audit_log_creds.json
|
| 21 |
build/
|
| 22 |
-
frames/
|
|
|
|
|
|
| 19 |
uploads/**
|
| 20 |
audit_log_creds.json
|
| 21 |
build/
|
| 22 |
+
frames/
|
| 23 |
+
*.ass
|
common-utils
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit 017243cdd3557fe8e78c195b79bdce52b17f5b92
|
server-plugins/generate-captions.js
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { readFileSync, createWriteStream } from 'fs';
|
| 2 |
+
import path from 'path';
|
| 3 |
+
|
| 4 |
+
/**
|
| 5 |
+
* Format seconds to ASS timestamp format (H:MM:SS.cc)
|
| 6 |
+
* @param {number} seconds
|
| 7 |
+
* @returns {string}
|
| 8 |
+
*/
|
| 9 |
+
function formatTimestampASS(seconds) {
|
| 10 |
+
const hours = Math.floor(seconds / 3600);
|
| 11 |
+
const minutes = Math.floor((seconds % 3600) / 60);
|
| 12 |
+
const secs = seconds % 60;
|
| 13 |
+
return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toFixed(2).padStart(5, '0')}`;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
/**
|
| 17 |
+
* Split transcript into sentences
|
| 18 |
+
* @param {string} transcript
|
| 19 |
+
* @returns {string[]}
|
| 20 |
+
*/
|
| 21 |
+
function splitIntoSentences(transcript) {
|
| 22 |
+
const parts = transcript.split(/([.!?]+)\s+/);
|
| 23 |
+
const result = [];
|
| 24 |
+
|
| 25 |
+
for (let i = 0; i < parts.length - 1; i += 2) {
|
| 26 |
+
if (i + 1 < parts.length) {
|
| 27 |
+
result.push(parts[i] + parts[i + 1]);
|
| 28 |
+
} else {
|
| 29 |
+
result.push(parts[i]);
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
if (parts.length % 2 === 1) {
|
| 34 |
+
result.push(parts[parts.length - 1]);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
return result;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
/**
|
| 41 |
+
* Assign sentence index to each word
|
| 42 |
+
* @param {Array} words
|
| 43 |
+
* @param {string} transcript
|
| 44 |
+
* @returns {Array}
|
| 45 |
+
*/
|
| 46 |
+
function assignSentenceToWords(words, transcript) {
|
| 47 |
+
const sentences = splitIntoSentences(transcript);
|
| 48 |
+
|
| 49 |
+
let wordIdx = 0;
|
| 50 |
+
sentences.forEach((sentence, sentIdx) => {
|
| 51 |
+
const sentenceWords = sentence.split(/\s+/);
|
| 52 |
+
sentenceWords.forEach(() => {
|
| 53 |
+
if (wordIdx < words.length) {
|
| 54 |
+
words[wordIdx].sentence_idx = sentIdx;
|
| 55 |
+
wordIdx++;
|
| 56 |
+
}
|
| 57 |
+
});
|
| 58 |
+
});
|
| 59 |
+
|
| 60 |
+
return words;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/**
|
| 64 |
+
* Create ASS file header with styles
|
| 65 |
+
* @param {number} videoWidth
|
| 66 |
+
* @param {number} videoHeight
|
| 67 |
+
* @param {string} fontName
|
| 68 |
+
* @param {number} fontSize
|
| 69 |
+
* @param {number} marginV
|
| 70 |
+
* @returns {string}
|
| 71 |
+
*/
|
| 72 |
+
function createASSHeader(videoWidth = 1920, videoHeight = 1080, fontName = 'Impact', fontSize = 72, marginV = 200, marginL = 10, marginR = 10) {
|
| 73 |
+
return `[Script Info]
|
| 74 |
+
Title: Word-by-Word Captions
|
| 75 |
+
ScriptType: v4.00+
|
| 76 |
+
WrapStyle: 0
|
| 77 |
+
PlayResX: ${videoWidth}
|
| 78 |
+
PlayResY: ${videoHeight}
|
| 79 |
+
ScaledBorderAndShadow: yes
|
| 80 |
+
|
| 81 |
+
[V4+ Styles]
|
| 82 |
+
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 83 |
+
Style: Default,${fontName},${fontSize},&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1
|
| 84 |
+
Style: Highlight,${fontName},${fontSize},&H0000FF00,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1
|
| 85 |
+
|
| 86 |
+
[Events]
|
| 87 |
+
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
| 88 |
+
`;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
/**
|
| 92 |
+
* Generate ASS subtitle file with word highlighting
|
| 93 |
+
* @param {Object} options
|
| 94 |
+
* @param {string} options.captionFilePath - Path to input JSON caption file
|
| 95 |
+
* @param {string} options.outputFilePath - Path to output ASS file
|
| 96 |
+
* @param {number} [options.tiltDegrees=8] - Tilt angle in degrees (alternates between +/-)
|
| 97 |
+
* @param {number} [options.translateY=200] - Distance from bottom in pixels
|
| 98 |
+
* @param {number} [options.widthPercent=80] - Width percentage for text centering (0-100)
|
| 99 |
+
* @param {string} [options.fontName='Impact'] - Font name
|
| 100 |
+
* @param {number} [options.fontSize=72] - Font size
|
| 101 |
+
* @param {number} [options.wordsPerGroup=4] - Number of words per caption group
|
| 102 |
+
* @param {number} [options.videoWidth=1920] - Video width for positioning
|
| 103 |
+
* @param {number} [options.videoHeight=1080] - Video height for positioning
|
| 104 |
+
* @returns {Promise<string>} Path to generated ASS file
|
| 105 |
+
*/
|
| 106 |
+
async function generateCaptions(options) {
|
| 107 |
+
const {
|
| 108 |
+
captionFilePath,
|
| 109 |
+
outputFilePath,
|
| 110 |
+
tiltDegrees = 8,
|
| 111 |
+
translateY = 200,
|
| 112 |
+
widthPercent = 80,
|
| 113 |
+
fontName = 'Impact',
|
| 114 |
+
fontSize = 72,
|
| 115 |
+
wordsPerGroup = 4,
|
| 116 |
+
videoWidth = 1920,
|
| 117 |
+
videoHeight = 1080
|
| 118 |
+
} = options;
|
| 119 |
+
|
| 120 |
+
// Read and parse JSON file
|
| 121 |
+
const jsonData = JSON.parse(readFileSync(captionFilePath, 'utf-8'));
|
| 122 |
+
const transcript = jsonData.transcript || '';
|
| 123 |
+
let words = jsonData.words || [];
|
| 124 |
+
|
| 125 |
+
if (words.length === 0) {
|
| 126 |
+
throw new Error('No words found in caption file');
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
// Assign sentence indices to words
|
| 130 |
+
words = assignSentenceToWords(words, transcript);
|
| 131 |
+
|
| 132 |
+
// Calculate margins for centering within width percentage
|
| 133 |
+
const totalMargin = videoWidth * (1 - widthPercent / 100);
|
| 134 |
+
const sideMargin = Math.floor(totalMargin / 2);
|
| 135 |
+
|
| 136 |
+
// Create output stream
|
| 137 |
+
const output = createWriteStream(outputFilePath);
|
| 138 |
+
|
| 139 |
+
// Write header with calculated margins
|
| 140 |
+
output.write(createASSHeader(videoWidth, videoHeight, fontName, fontSize, translateY, sideMargin, sideMargin));
|
| 141 |
+
|
| 142 |
+
// Process words in groups respecting sentence boundaries
|
| 143 |
+
let i = 0;
|
| 144 |
+
let groupIdx = 0;
|
| 145 |
+
|
| 146 |
+
while (i < words.length) {
|
| 147 |
+
const currentSentence = words[i].sentence_idx || 0;
|
| 148 |
+
|
| 149 |
+
// Collect words for this group (up to wordsPerGroup, same sentence only)
|
| 150 |
+
const wordGroup = [];
|
| 151 |
+
let j = i;
|
| 152 |
+
|
| 153 |
+
while (j < words.length && wordGroup.length < wordsPerGroup) {
|
| 154 |
+
if ((words[j].sentence_idx || 0) === currentSentence) {
|
| 155 |
+
wordGroup.push(words[j]);
|
| 156 |
+
j++;
|
| 157 |
+
} else {
|
| 158 |
+
break; // Stop at sentence boundary
|
| 159 |
+
}
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
if (wordGroup.length === 0) {
|
| 163 |
+
i++;
|
| 164 |
+
continue;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
// Alternate tilt
|
| 168 |
+
const currentTilt = groupIdx % 2 === 0 ? tiltDegrees : -tiltDegrees;
|
| 169 |
+
const tiltTag = `{\\frz${currentTilt}}`;
|
| 170 |
+
|
| 171 |
+
// Calculate positioning for centering
|
| 172 |
+
const posTag = sideMargin > 0 ? `{\\an2\\pos(${videoWidth / 2},${videoHeight - translateY})}` : '';
|
| 173 |
+
|
| 174 |
+
// For each word in the group, create an event with highlighting
|
| 175 |
+
for (let wordIdx = 0; wordIdx < wordGroup.length; wordIdx++) {
|
| 176 |
+
const wordObj = wordGroup[wordIdx];
|
| 177 |
+
const wordStart = wordObj.start;
|
| 178 |
+
const wordEnd = wordObj.end;
|
| 179 |
+
|
| 180 |
+
// Build the caption text with highlighting
|
| 181 |
+
const captionParts = wordGroup.map((w, idx) => {
|
| 182 |
+
if (idx === wordIdx) {
|
| 183 |
+
// Current word - highlighted in green
|
| 184 |
+
return `{\\c&H00FF00&}${w.word}{\\c&HFFFFFF&}`;
|
| 185 |
+
} else {
|
| 186 |
+
// Other words - white
|
| 187 |
+
return w.word;
|
| 188 |
+
}
|
| 189 |
+
});
|
| 190 |
+
|
| 191 |
+
const captionText = tiltTag + posTag + captionParts.join(' ');
|
| 192 |
+
|
| 193 |
+
// Write dialogue line
|
| 194 |
+
output.write(`Dialogue: 0,${formatTimestampASS(wordStart)},${formatTimestampASS(wordEnd)},Default,,0,0,0,,${captionText}\n`);
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
i = j;
|
| 198 |
+
groupIdx++;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
output.end();
|
| 202 |
+
|
| 203 |
+
return new Promise((resolve, reject) => {
|
| 204 |
+
output.on('finish', () => {
|
| 205 |
+
console.log(`Created ${outputFilePath} with word-by-word highlighting and tilted groups`);
|
| 206 |
+
resolve(outputFilePath);
|
| 207 |
+
});
|
| 208 |
+
output.on('error', reject);
|
| 209 |
+
});
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
// Example usage
|
| 213 |
+
// Check if this is the main module in ESM
|
| 214 |
+
import { fileURLToPath } from 'url';
|
| 215 |
+
import { dirname } from 'path';
|
| 216 |
+
|
| 217 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 218 |
+
const isMainModule = process.argv[1] === __filename;
|
| 219 |
+
|
| 220 |
+
if (isMainModule) {
|
| 221 |
+
const args = process.argv.slice(2);
|
| 222 |
+
|
| 223 |
+
if (args.length < 2) {
|
| 224 |
+
console.log('Usage: node generateCaptions.js <input.json> <output.ass> [options]');
|
| 225 |
+
console.log('\nOptions:');
|
| 226 |
+
console.log(' --tilt <degrees> Tilt angle (default: 8)');
|
| 227 |
+
console.log(' --translateY <pixels> Distance from bottom (default: 200)');
|
| 228 |
+
console.log(' --width <percent> Width percentage 0-100 (default: 80)');
|
| 229 |
+
console.log(' --font <name> Font name (default: Impact)');
|
| 230 |
+
console.log(' --fontSize <size> Font size (default: 72)');
|
| 231 |
+
console.log(' --wordsPerGroup <num> Words per caption group (default: 4)');
|
| 232 |
+
console.log('\nExample:');
|
| 233 |
+
console.log(' node generateCaptions.js input.json output.ass --tilt 10 --width 90');
|
| 234 |
+
process.exit(1);
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
const captionFilePath = args[0];
|
| 238 |
+
const outputFilePath = args[1];
|
| 239 |
+
|
| 240 |
+
// Parse optional arguments
|
| 241 |
+
const options = {
|
| 242 |
+
captionFilePath,
|
| 243 |
+
outputFilePath
|
| 244 |
+
};
|
| 245 |
+
|
| 246 |
+
for (let i = 2; i < args.length; i += 2) {
|
| 247 |
+
const key = args[i].replace('--', '');
|
| 248 |
+
const value = args[i + 1];
|
| 249 |
+
|
| 250 |
+
if (key === 'tilt') options.tiltDegrees = parseFloat(value);
|
| 251 |
+
else if (key === 'translateY') options.translateY = parseInt(value);
|
| 252 |
+
else if (key === 'width') options.widthPercent = parseFloat(value);
|
| 253 |
+
else if (key === 'font') options.fontName = value;
|
| 254 |
+
else if (key === 'fontSize') options.fontSize = parseInt(value);
|
| 255 |
+
else if (key === 'wordsPerGroup') options.wordsPerGroup = parseInt(value);
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
generateCaptions(options).catch(console.error);
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
export default { generateCaptions };
|