shiveshnavin commited on
Commit
8ba367b
·
1 Parent(s): 3d8e650

Add caption addition

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. common-utils +1 -1
  3. server-plugins/generate-captions.js +261 -0
.gitignore CHANGED
@@ -19,4 +19,5 @@ uploads/
19
  uploads/**
20
  audit_log_creds.json
21
  build/
22
- frames/
 
 
19
  uploads/**
20
  audit_log_creds.json
21
  build/
22
+ frames/
23
+ *.ass
common-utils CHANGED
@@ -1 +1 @@
1
- Subproject commit d8e83ba4748273a696d23d0a11df0dc1ae7fae32
 
1
+ Subproject commit 017243cdd3557fe8e78c195b79bdce52b17f5b92
server-plugins/generate-captions.js ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { readFileSync, createWriteStream } from 'fs';
2
+ import path from 'path';
3
+
4
+ /**
5
+ * Format seconds to ASS timestamp format (H:MM:SS.cc)
6
+ * @param {number} seconds
7
+ * @returns {string}
8
+ */
9
+ function formatTimestampASS(seconds) {
10
+ const hours = Math.floor(seconds / 3600);
11
+ const minutes = Math.floor((seconds % 3600) / 60);
12
+ const secs = seconds % 60;
13
+ return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toFixed(2).padStart(5, '0')}`;
14
+ }
15
+
16
+ /**
17
+ * Split transcript into sentences
18
+ * @param {string} transcript
19
+ * @returns {string[]}
20
+ */
21
+ function splitIntoSentences(transcript) {
22
+ const parts = transcript.split(/([.!?]+)\s+/);
23
+ const result = [];
24
+
25
+ for (let i = 0; i < parts.length - 1; i += 2) {
26
+ if (i + 1 < parts.length) {
27
+ result.push(parts[i] + parts[i + 1]);
28
+ } else {
29
+ result.push(parts[i]);
30
+ }
31
+ }
32
+
33
+ if (parts.length % 2 === 1) {
34
+ result.push(parts[parts.length - 1]);
35
+ }
36
+
37
+ return result;
38
+ }
39
+
40
+ /**
41
+ * Assign sentence index to each word
42
+ * @param {Array} words
43
+ * @param {string} transcript
44
+ * @returns {Array}
45
+ */
46
+ function assignSentenceToWords(words, transcript) {
47
+ const sentences = splitIntoSentences(transcript);
48
+
49
+ let wordIdx = 0;
50
+ sentences.forEach((sentence, sentIdx) => {
51
+ const sentenceWords = sentence.split(/\s+/);
52
+ sentenceWords.forEach(() => {
53
+ if (wordIdx < words.length) {
54
+ words[wordIdx].sentence_idx = sentIdx;
55
+ wordIdx++;
56
+ }
57
+ });
58
+ });
59
+
60
+ return words;
61
+ }
62
+
63
+ /**
64
+ * Create ASS file header with styles
65
+ * @param {number} videoWidth
66
+ * @param {number} videoHeight
67
+ * @param {string} fontName
68
+ * @param {number} fontSize
69
+ * @param {number} marginV
70
+ * @returns {string}
71
+ */
72
+ function createASSHeader(videoWidth = 1920, videoHeight = 1080, fontName = 'Impact', fontSize = 72, marginV = 200, marginL = 10, marginR = 10) {
73
+ return `[Script Info]
74
+ Title: Word-by-Word Captions
75
+ ScriptType: v4.00+
76
+ WrapStyle: 0
77
+ PlayResX: ${videoWidth}
78
+ PlayResY: ${videoHeight}
79
+ ScaledBorderAndShadow: yes
80
+
81
+ [V4+ Styles]
82
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
83
+ Style: Default,${fontName},${fontSize},&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1
84
+ Style: Highlight,${fontName},${fontSize},&H0000FF00,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,2,2,${marginL},${marginR},${marginV},1
85
+
86
+ [Events]
87
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
88
+ `;
89
+ }
90
+
91
+ /**
92
+ * Generate ASS subtitle file with word highlighting
93
+ * @param {Object} options
94
+ * @param {string} options.captionFilePath - Path to input JSON caption file
95
+ * @param {string} options.outputFilePath - Path to output ASS file
96
+ * @param {number} [options.tiltDegrees=8] - Tilt angle in degrees (alternates between +/-)
97
+ * @param {number} [options.translateY=200] - Distance from bottom in pixels
98
+ * @param {number} [options.widthPercent=80] - Width percentage for text centering (0-100)
99
+ * @param {string} [options.fontName='Impact'] - Font name
100
+ * @param {number} [options.fontSize=72] - Font size
101
+ * @param {number} [options.wordsPerGroup=4] - Number of words per caption group
102
+ * @param {number} [options.videoWidth=1920] - Video width for positioning
103
+ * @param {number} [options.videoHeight=1080] - Video height for positioning
104
+ * @returns {Promise<string>} Path to generated ASS file
105
+ */
106
+ async function generateCaptions(options) {
107
+ const {
108
+ captionFilePath,
109
+ outputFilePath,
110
+ tiltDegrees = 8,
111
+ translateY = 200,
112
+ widthPercent = 80,
113
+ fontName = 'Impact',
114
+ fontSize = 72,
115
+ wordsPerGroup = 4,
116
+ videoWidth = 1920,
117
+ videoHeight = 1080
118
+ } = options;
119
+
120
+ // Read and parse JSON file
121
+ const jsonData = JSON.parse(readFileSync(captionFilePath, 'utf-8'));
122
+ const transcript = jsonData.transcript || '';
123
+ let words = jsonData.words || [];
124
+
125
+ if (words.length === 0) {
126
+ throw new Error('No words found in caption file');
127
+ }
128
+
129
+ // Assign sentence indices to words
130
+ words = assignSentenceToWords(words, transcript);
131
+
132
+ // Calculate margins for centering within width percentage
133
+ const totalMargin = videoWidth * (1 - widthPercent / 100);
134
+ const sideMargin = Math.floor(totalMargin / 2);
135
+
136
+ // Create output stream
137
+ const output = createWriteStream(outputFilePath);
138
+
139
+ // Write header with calculated margins
140
+ output.write(createASSHeader(videoWidth, videoHeight, fontName, fontSize, translateY, sideMargin, sideMargin));
141
+
142
+ // Process words in groups respecting sentence boundaries
143
+ let i = 0;
144
+ let groupIdx = 0;
145
+
146
+ while (i < words.length) {
147
+ const currentSentence = words[i].sentence_idx || 0;
148
+
149
+ // Collect words for this group (up to wordsPerGroup, same sentence only)
150
+ const wordGroup = [];
151
+ let j = i;
152
+
153
+ while (j < words.length && wordGroup.length < wordsPerGroup) {
154
+ if ((words[j].sentence_idx || 0) === currentSentence) {
155
+ wordGroup.push(words[j]);
156
+ j++;
157
+ } else {
158
+ break; // Stop at sentence boundary
159
+ }
160
+ }
161
+
162
+ if (wordGroup.length === 0) {
163
+ i++;
164
+ continue;
165
+ }
166
+
167
+ // Alternate tilt
168
+ const currentTilt = groupIdx % 2 === 0 ? tiltDegrees : -tiltDegrees;
169
+ const tiltTag = `{\\frz${currentTilt}}`;
170
+
171
+ // Calculate positioning for centering
172
+ const posTag = sideMargin > 0 ? `{\\an2\\pos(${videoWidth / 2},${videoHeight - translateY})}` : '';
173
+
174
+ // For each word in the group, create an event with highlighting
175
+ for (let wordIdx = 0; wordIdx < wordGroup.length; wordIdx++) {
176
+ const wordObj = wordGroup[wordIdx];
177
+ const wordStart = wordObj.start;
178
+ const wordEnd = wordObj.end;
179
+
180
+ // Build the caption text with highlighting
181
+ const captionParts = wordGroup.map((w, idx) => {
182
+ if (idx === wordIdx) {
183
+ // Current word - highlighted in green
184
+ return `{\\c&H00FF00&}${w.word}{\\c&HFFFFFF&}`;
185
+ } else {
186
+ // Other words - white
187
+ return w.word;
188
+ }
189
+ });
190
+
191
+ const captionText = tiltTag + posTag + captionParts.join(' ');
192
+
193
+ // Write dialogue line
194
+ output.write(`Dialogue: 0,${formatTimestampASS(wordStart)},${formatTimestampASS(wordEnd)},Default,,0,0,0,,${captionText}\n`);
195
+ }
196
+
197
+ i = j;
198
+ groupIdx++;
199
+ }
200
+
201
+ output.end();
202
+
203
+ return new Promise((resolve, reject) => {
204
+ output.on('finish', () => {
205
+ console.log(`Created ${outputFilePath} with word-by-word highlighting and tilted groups`);
206
+ resolve(outputFilePath);
207
+ });
208
+ output.on('error', reject);
209
+ });
210
+ }
211
+
212
+ // Example usage
213
+ // Check if this is the main module in ESM
214
+ import { fileURLToPath } from 'url';
215
+ import { dirname } from 'path';
216
+
217
+ const __filename = fileURLToPath(import.meta.url);
218
+ const isMainModule = process.argv[1] === __filename;
219
+
220
+ if (isMainModule) {
221
+ const args = process.argv.slice(2);
222
+
223
+ if (args.length < 2) {
224
+ console.log('Usage: node generateCaptions.js <input.json> <output.ass> [options]');
225
+ console.log('\nOptions:');
226
+ console.log(' --tilt <degrees> Tilt angle (default: 8)');
227
+ console.log(' --translateY <pixels> Distance from bottom (default: 200)');
228
+ console.log(' --width <percent> Width percentage 0-100 (default: 80)');
229
+ console.log(' --font <name> Font name (default: Impact)');
230
+ console.log(' --fontSize <size> Font size (default: 72)');
231
+ console.log(' --wordsPerGroup <num> Words per caption group (default: 4)');
232
+ console.log('\nExample:');
233
+ console.log(' node generateCaptions.js input.json output.ass --tilt 10 --width 90');
234
+ process.exit(1);
235
+ }
236
+
237
+ const captionFilePath = args[0];
238
+ const outputFilePath = args[1];
239
+
240
+ // Parse optional arguments
241
+ const options = {
242
+ captionFilePath,
243
+ outputFilePath
244
+ };
245
+
246
+ for (let i = 2; i < args.length; i += 2) {
247
+ const key = args[i].replace('--', '');
248
+ const value = args[i + 1];
249
+
250
+ if (key === 'tilt') options.tiltDegrees = parseFloat(value);
251
+ else if (key === 'translateY') options.translateY = parseInt(value);
252
+ else if (key === 'width') options.widthPercent = parseFloat(value);
253
+ else if (key === 'font') options.fontName = value;
254
+ else if (key === 'fontSize') options.fontSize = parseInt(value);
255
+ else if (key === 'wordsPerGroup') options.wordsPerGroup = parseInt(value);
256
+ }
257
+
258
+ generateCaptions(options).catch(console.error);
259
+ }
260
+
261
+ export default { generateCaptions };