Franco Zanardi commited on
Commit
9c093a9
·
1 Parent(s): d366aea

upload streamlit ui

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .vscode
Dockerfile CHANGED
@@ -1,5 +1,6 @@
1
- FROM python:3.9-slim
2
 
 
3
  WORKDIR /app
4
 
5
  RUN apt-get update && apt-get install -y \
@@ -7,15 +8,33 @@ RUN apt-get update && apt-get install -y \
7
  curl \
8
  software-properties-common \
9
  git \
 
 
 
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
 
12
  COPY requirements.txt ./
 
 
 
 
 
 
 
 
 
 
13
  COPY src/ ./src/
14
 
15
- RUN pip3 install -r requirements.txt
 
 
16
 
17
  EXPOSE 8501
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ FROM python:3.10-slim
2
 
3
+ ENV HOME="/app"
4
  WORKDIR /app
5
 
6
  RUN apt-get update && apt-get install -y \
 
8
  curl \
9
  software-properties-common \
10
  git \
11
+ ffmpeg \
12
+ libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libdbus-1-3 \
13
+ libxkbcommon0 libatspi2.0-0 libx11-6 libxcomposite1 libxdamage1 libxext6 \
14
+ libxfixes3 libxrandr2 libgbm1 libasound2 \
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
+ WORKDIR /app
18
+
19
  COPY requirements.txt ./
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ ENV PLAYWRIGHT_BROWSERS_PATH="/app/.playwright"
23
+ ENV XDG_CACHE_HOME="/app/.cache"
24
+ RUN playwright install-deps
25
+ RUN playwright install chromium
26
+ RUN python3 -c "import whisper; whisper.load_model('base', download_root='${XDG_CACHE_HOME}/whisper')"
27
+
28
+ RUN useradd -m -u 1000 user
29
+
30
  COPY src/ ./src/
31
 
32
+ RUN chown -R user:user /app
33
+
34
+ USER user
35
 
36
  EXPOSE 8501
37
 
38
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
39
 
40
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -12,9 +12,6 @@ short_description: Add styled subtitles to short videos
12
  license: mit
13
  ---
14
 
15
- # Welcome to Streamlit!
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
-
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
12
  license: mit
13
  ---
14
 
15
+ # Pycaps Demo
16
 
17
+ This a demo of a open-source tool to add styled subtitles in videos. You can see more about this tool in the [pycaps GitHub repo](https://github.com/francozanardi/pycaps).
 
 
 
editor/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit.components.v1 as components
2
+ import os
3
+
4
+ _component_func = components.declare_component(
5
+ "editor",
6
+ path=os.path.join(os.path.dirname(os.path.abspath(__file__)))
7
+ )
8
+
9
+ def subtitle_editor(initial_document, key=None):
10
+ component_value = _component_func(
11
+ initial_document=initial_document,
12
+ key=key,
13
+ )
14
+ return component_value
editor/index.html ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Subtitle Editor</title>
6
+ <style>
7
+ body {
8
+ background-color: #282c34;
9
+ color: #abb2bf;
10
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
11
+ margin: 0;
12
+ overflow: hidden;
13
+ }
14
+
15
+ #editor-container {
16
+ padding: 20px;
17
+ height: calc(100vh - 80px);
18
+ overflow-y: auto;
19
+ }
20
+
21
+ .segment {
22
+ background-color: #21252b;
23
+ border: 1px solid #3c4049;
24
+ border-radius: 8px;
25
+ margin-bottom: 20px;
26
+ padding: 15px;
27
+ position: relative;
28
+ }
29
+
30
+ .segment-header {
31
+ display: flex;
32
+ justify-content: space-between;
33
+ align-items: center;
34
+ margin-bottom: 10px;
35
+ padding-bottom: 8px;
36
+ border-bottom: 1px solid #3c4049;
37
+ }
38
+
39
+ .segment-title {
40
+ font-size: 14px;
41
+ color: #61afef;
42
+ font-weight: bold;
43
+ }
44
+
45
+ .segment-controls {
46
+ display: flex;
47
+ gap: 8px;
48
+ }
49
+
50
+ .control-btn {
51
+ background-color: #4b5263;
52
+ border: none;
53
+ border-radius: 4px;
54
+ padding: 4px 8px;
55
+ font-size: 12px;
56
+ color: #abb2bf;
57
+ cursor: pointer;
58
+ transition: background-color 0.2s;
59
+ }
60
+
61
+ .control-btn:hover {
62
+ background-color: #5c6370;
63
+ }
64
+
65
+ .split-btn {
66
+ background-color: #e5c07b;
67
+ color: #282c34;
68
+ }
69
+
70
+ .split-btn:hover {
71
+ background-color: #f0d07b;
72
+ }
73
+
74
+ .merge-btn {
75
+ background-color: #98c379;
76
+ color: #282c34;
77
+ }
78
+
79
+ .merge-btn:hover {
80
+ background-color: #a8d389;
81
+ }
82
+
83
+ .line {
84
+ display: flex;
85
+ align-items: center;
86
+ background-color: #2c313a;
87
+ border: 1px solid #4b515d;
88
+ border-radius: 6px;
89
+ margin-bottom: 8px;
90
+ padding: 8px 12px;
91
+ position: relative;
92
+ flex-wrap: wrap;
93
+ }
94
+
95
+ .line-controls {
96
+ display: flex;
97
+ gap: 6px;
98
+ margin-left: auto;
99
+ margin-right: 8px;
100
+ }
101
+
102
+ .line-control-btn {
103
+ background-color: #4b5263;
104
+ border: none;
105
+ border-radius: 3px;
106
+ padding: 2px 6px;
107
+ font-size: 10px;
108
+ color: #abb2bf;
109
+ cursor: pointer;
110
+ opacity: 0;
111
+ transition: opacity 0.2s, background-color 0.2s;
112
+ }
113
+
114
+ .line:hover .line-control-btn {
115
+ opacity: 1;
116
+ }
117
+
118
+ .line-control-btn:hover {
119
+ background-color: #5c6370;
120
+ }
121
+
122
+ .word {
123
+ display: inline-flex;
124
+ align-items: center;
125
+ background-color: #3b4048;
126
+ border: 1px dashed #6b7280;
127
+ border-radius: 4px;
128
+ padding: 4px 8px;
129
+ margin: 3px;
130
+ cursor: pointer;
131
+ transition: all 0.2s;
132
+ user-select: none;
133
+ position: relative;
134
+ }
135
+
136
+ .word:hover {
137
+ background-color: #4b5263;
138
+ border-style: solid;
139
+ transform: translateY(-1px);
140
+ }
141
+
142
+ .word.selected {
143
+ background-color: #61afef;
144
+ color: #282c34;
145
+ border-color: #61afef;
146
+ }
147
+
148
+ /* Dropdown para palabras */
149
+ .word-dropdown {
150
+ position: fixed;
151
+ background-color: #2c313a;
152
+ border: 1px solid #4b515d;
153
+ border-radius: 6px;
154
+ padding: 8px 0;
155
+ z-index: 1000;
156
+ min-width: 150px;
157
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3);
158
+ display: none;
159
+ }
160
+
161
+ .word-dropdown.show {
162
+ display: block;
163
+ }
164
+
165
+ .dropdown-item {
166
+ padding: 8px 12px;
167
+ cursor: pointer;
168
+ transition: background-color 0.2s;
169
+ display: flex;
170
+ align-items: center;
171
+ gap: 8px;
172
+ }
173
+
174
+ .dropdown-item:hover {
175
+ background-color: #3e4451;
176
+ }
177
+
178
+ .dropdown-separator {
179
+ height: 1px;
180
+ background-color: #4b515d;
181
+ margin: 4px 0;
182
+ }
183
+
184
+ /* Tooltip para tags */
185
+ .word-tags {
186
+ position: absolute;
187
+ bottom: 100%;
188
+ left: 50%;
189
+ transform: translateX(-50%);
190
+ background-color: #21252b;
191
+ color: #c792ea;
192
+ padding: 4px 8px;
193
+ border-radius: 4px;
194
+ font-size: 11px;
195
+ white-space: nowrap;
196
+ opacity: 0;
197
+ visibility: hidden;
198
+ transition: opacity 0.2s, visibility 0.2s;
199
+ z-index: 10;
200
+ margin-bottom: 4px;
201
+ }
202
+
203
+ .word:hover .word-tags {
204
+ opacity: 1;
205
+ visibility: visible;
206
+ }
207
+
208
+ /* Modal para editar tags */
209
+ .modal-overlay {
210
+ position: fixed;
211
+ top: 0;
212
+ left: 0;
213
+ right: 0;
214
+ bottom: 0;
215
+ background-color: rgba(0,0,0,0.7);
216
+ display: flex;
217
+ align-items: center;
218
+ justify-content: center;
219
+ z-index: 2000;
220
+ opacity: 0;
221
+ visibility: hidden;
222
+ transition: opacity 0.2s, visibility 0.2s;
223
+ }
224
+
225
+ .modal-overlay.show {
226
+ opacity: 1;
227
+ visibility: visible;
228
+ }
229
+
230
+ .modal {
231
+ background-color: #21252b;
232
+ border-radius: 8px;
233
+ padding: 20px;
234
+ min-width: 300px;
235
+ max-width: 500px;
236
+ }
237
+
238
+ .modal h3 {
239
+ margin-top: 0;
240
+ color: #61afef;
241
+ }
242
+
243
+ .modal input, .modal textarea {
244
+ box-sizing: border-box;
245
+ width: 100%;
246
+ background-color: #2c313a;
247
+ border: 1px solid #4b515d;
248
+ border-radius: 4px;
249
+ padding: 8px;
250
+ color: #abb2bf;
251
+ font-family: inherit;
252
+ margin-bottom: 10px;
253
+ }
254
+
255
+ .modal-buttons {
256
+ display: flex;
257
+ justify-content: flex-end;
258
+ gap: 10px;
259
+ margin-top: 15px;
260
+ }
261
+
262
+ .footer-controls {
263
+ position: fixed;
264
+ bottom: 0;
265
+ left: 0;
266
+ right: 0;
267
+ background-color: #21252b;
268
+ padding: 15px;
269
+ text-align: right;
270
+ border-top: 1px solid #3c4049;
271
+ }
272
+
273
+ button {
274
+ color: #abb2bf;
275
+ background-color: #4b5263;
276
+ border: none;
277
+ border-radius: 5px;
278
+ padding: 10px 20px;
279
+ font-size: 14px;
280
+ font-weight: bold;
281
+ cursor: pointer;
282
+ margin-left: 10px;
283
+ transition: background-color 0.2s;
284
+ }
285
+
286
+ button:hover {
287
+ background-color: #5c6370;
288
+ }
289
+
290
+ #save-btn {
291
+ background-color: #61afef;
292
+ color: #21252b;
293
+ }
294
+
295
+ #save-btn:hover {
296
+ background-color: #7abfff;
297
+ }
298
+
299
+ .split-indicator {
300
+ position: absolute;
301
+ top: 0;
302
+ bottom: 0;
303
+ width: 2px;
304
+ background-color: #e5c07b;
305
+ opacity: 0;
306
+ transition: opacity 0.2s;
307
+ pointer-events: none;
308
+ }
309
+
310
+ .split-indicator.show {
311
+ opacity: 1;
312
+ }
313
+ </style>
314
+ </head>
315
+ <body>
316
+ <div id="editor-container">Loading...</div>
317
+
318
+ <div class="footer-controls">
319
+ <button id="cancel-btn">Cancel</button>
320
+ <button id="save-btn">Save and Close</button>
321
+ </div>
322
+
323
+ <div id="tag-modal" class="modal-overlay">
324
+ <div class="modal">
325
+ <h3>Edit tags</h3>
326
+ <label>Word: <span id="modal-word-text"></span></label>
327
+ <textarea id="modal-tags" placeholder="Enter tags separated by commas (e.g., noun, important, technical)"></textarea>
328
+ <div class="modal-buttons">
329
+ <button id="modal-cancel">Cancel</button>
330
+ <button id="modal-save">Save</button>
331
+ </div>
332
+ </div>
333
+ </div>
334
+
335
+ <div id="component-value" style="display: none;"></div>
336
+
337
+ <script>
338
+
339
+ // Source: https://discuss.streamlit.io/t/code-snippet-create-components-without-any-frontend-tooling-no-react-babel-webpack-etc/13064
340
+ function sendMessageToStreamlitClient(type, data) {
341
+ const outData = Object.assign({
342
+ isStreamlitMessage: true,
343
+ type: type,
344
+ }, data);
345
+ window.parent.postMessage(outData, "*");
346
+ }
347
+
348
+ function init() {
349
+ sendMessageToStreamlitClient("streamlit:componentReady", {apiVersion: 1});
350
+ }
351
+
352
+ function setFrameHeight(height) {
353
+ sendMessageToStreamlitClient("streamlit:setFrameHeight", {height: height});
354
+ }
355
+
356
+ // `data` puede ser cualquier valor serializable en JSON.
357
+ function sendDataToPython(data) {
358
+ sendMessageToStreamlitClient("streamlit:setComponentValue", {value: data, dataType: "json"});
359
+ }
360
+
361
+ function onDataFromPython(event) {
362
+ if (event.data.type !== "streamlit:render") return;
363
+
364
+ const initialDocument = event.data.args.initial_document;
365
+ if (initialDocument) {
366
+ main(initialDocument);
367
+ } else {
368
+ document.getElementById('editor-container').textContent = 'Error: Could not load subtitle data.';
369
+ }
370
+ }
371
+
372
+ function main(documentData) {
373
+ const editorContainer = document.getElementById('editor-container');
374
+ const tagModal = document.getElementById('tag-modal');
375
+ let documentState = JSON.parse(JSON.stringify(documentData));
376
+ let currentWordDropdown = null;
377
+ let currentEditingWord = null;
378
+
379
+ function render() {
380
+ editorContainer.innerHTML = '';
381
+
382
+ documentState.segments.forEach((segment, segIndex) => {
383
+ const segDiv = document.createElement('div');
384
+ segDiv.className = 'segment';
385
+
386
+ const segHeader = document.createElement('div');
387
+ segHeader.className = 'segment-header';
388
+
389
+ const segTitle = document.createElement('div');
390
+ segTitle.className = 'segment-title';
391
+ segTitle.textContent = `Segment ${segIndex + 1} (${segment.time.start.toFixed(2)}s - ${segment.time.end.toFixed(2)}s)`;
392
+
393
+ const segControls = document.createElement('div');
394
+ segControls.className = 'segment-controls';
395
+
396
+ if (segIndex < documentState.segments.length - 1) {
397
+ const mergeBtn = document.createElement('button');
398
+ mergeBtn.className = 'control-btn merge-btn';
399
+ mergeBtn.textContent = 'Merge Next';
400
+ mergeBtn.addEventListener('click', () => mergeSegments(segIndex));
401
+ segControls.appendChild(mergeBtn);
402
+ }
403
+
404
+ segHeader.appendChild(segTitle);
405
+ segHeader.appendChild(segControls);
406
+ segDiv.appendChild(segHeader);
407
+
408
+ segment.lines.forEach((line, lineIndex) => {
409
+ const lineDiv = document.createElement('div');
410
+ lineDiv.className = 'line';
411
+
412
+ line.words.forEach((word, wordIndex) => {
413
+ const wordSpan = document.createElement('span');
414
+ wordSpan.className = 'word';
415
+ wordSpan.dataset.segIndex = segIndex;
416
+ wordSpan.dataset.lineIndex = lineIndex;
417
+ wordSpan.dataset.wordIndex = wordIndex;
418
+ wordSpan.textContent = word.text;
419
+
420
+ if (word.semantic_tags && word.semantic_tags.length > 0) {
421
+ const tagsTooltip = document.createElement('span');
422
+ tagsTooltip.className = 'word-tags';
423
+ tagsTooltip.textContent = word.semantic_tags.map(t => t.name).join(', ');
424
+ wordSpan.appendChild(tagsTooltip);
425
+ }
426
+
427
+ wordSpan.addEventListener('click', (e) => {
428
+ e.stopPropagation();
429
+ showWordDropdown(wordSpan, segIndex, lineIndex, wordIndex);
430
+ });
431
+
432
+ lineDiv.appendChild(wordSpan);
433
+ });
434
+
435
+ const lineControls = document.createElement('div');
436
+ lineControls.className = 'line-controls';
437
+
438
+ if (lineIndex < segment.lines.length - 1) {
439
+ const mergeBtn = document.createElement('button');
440
+ mergeBtn.className = 'line-control-btn merge-btn';
441
+ mergeBtn.textContent = 'Merge with next line';
442
+ mergeBtn.addEventListener('click', () => mergeLines(segIndex, lineIndex));
443
+ lineControls.appendChild(mergeBtn);
444
+
445
+ const splitBtn = document.createElement('button');
446
+ splitBtn.className = 'line-control-btn split-btn';
447
+ splitBtn.textContent = 'Split into two segments';
448
+ splitBtn.addEventListener('click', () => splitSegments(segIndex, lineIndex));
449
+ lineControls.appendChild(splitBtn);
450
+ }
451
+
452
+ lineDiv.appendChild(lineControls);
453
+ segDiv.appendChild(lineDiv);
454
+ });
455
+
456
+ editorContainer.appendChild(segDiv);
457
+ });
458
+ }
459
+
460
+ function showWordDropdown(wordElement, segIndex, lineIndex, wordIndex) {
461
+ // Cerrar dropdown anterior si existe
462
+ if (currentWordDropdown) {
463
+ currentWordDropdown.remove();
464
+ }
465
+
466
+ const dropdown = document.createElement('div');
467
+ dropdown.className = 'word-dropdown show';
468
+ const rect = wordElement.getBoundingClientRect();
469
+ dropdown.style.top = `${rect.bottom}px`;
470
+ dropdown.style.left = `${rect.left}px`;
471
+
472
+ const word = documentState.segments[segIndex].lines[lineIndex].words[wordIndex];
473
+
474
+ dropdown.innerHTML = `
475
+ <div class="dropdown-item" data-action="edit-text">✏️ Edit Text</div>
476
+ <div class="dropdown-item" data-action="edit-tags">🏷️ Edit Tags</div>
477
+ <div class="dropdown-separator"></div>
478
+ <div class="dropdown-item" data-action="split-after">↓ Split Line After</div>
479
+ <div class="dropdown-separator"></div>
480
+ <div class="dropdown-item" data-action="delete" style="color: #e06c75;">🗑️ Delete Word</div>
481
+ `;
482
+
483
+ dropdown.addEventListener('click', (e) => {
484
+ const action = e.target.dataset.action;
485
+ if (!action) return;
486
+
487
+ switch (action) {
488
+ case 'edit-text':
489
+ editWordText(segIndex, lineIndex, wordIndex);
490
+ break;
491
+ case 'edit-tags':
492
+ editWordTags(segIndex, lineIndex, wordIndex);
493
+ break;
494
+ case 'split-after':
495
+ splitLine(segIndex, lineIndex, wordIndex);
496
+ break;
497
+ case 'delete':
498
+ deleteWord(segIndex, lineIndex, wordIndex);
499
+ break;
500
+ }
501
+ dropdown.remove();
502
+ currentWordDropdown = null;
503
+ });
504
+
505
+ document.body.appendChild(dropdown);
506
+ currentWordDropdown = dropdown;
507
+ }
508
+
509
+ function editWordText(segIndex, lineIndex, wordIndex) {
510
+ const word = documentState.segments[segIndex].lines[lineIndex].words[wordIndex];
511
+ const newText = prompt('Edit word (use [SPACE] key to split words):', word.text);
512
+
513
+ if (!newText || !newText.trim()) {
514
+ return;
515
+ }
516
+
517
+ const words = newText.trim().split(/\s+/);
518
+
519
+ if (words.length === 1) {
520
+ const newWord = {
521
+ text: newText,
522
+ time: { start: word.time.start, end: word.time.end },
523
+ semantic_tags: [],
524
+ structure_tags: [],
525
+ clips: [],
526
+ max_layout: { position: {x:0, y:0}, size: {width:0, height:0} }
527
+ };
528
+ documentState.segments[segIndex].lines[lineIndex].words.splice(wordIndex, 1, newWord);
529
+ render();
530
+ return;
531
+ }
532
+
533
+ const originalStart = word.time.start.toFixed(2);
534
+ const originalEnd = word.time.end.toFixed(2);
535
+ const totalDuration = word.time.end - word.time.start;
536
+ const totalChars = words.reduce((sum, w) => sum + w.length, 0);
537
+
538
+ let currentTime = word.time.start;
539
+ const timestamps = words.map((text, index) => {
540
+ const duration = totalDuration * (text.length / totalChars);
541
+ const start = currentTime.toFixed(2);
542
+ const end = (currentTime + duration).toFixed(2);
543
+ currentTime = parseFloat(end);
544
+ return { text, start, end };
545
+ });
546
+
547
+ const exampleMessage = timestamps.map(t =>
548
+ `"${t.text}" (${t.start}s - ${t.end}s)`
549
+ ).join(' and ');
550
+
551
+ const warningMessage = `Warning: You are splitting one word into multiple words!\n\n` +
552
+ `Original word: "${word.text}" (${originalStart}s - ${originalEnd}s)\n` +
553
+ `Will be split into: ${exampleMessage}\n\n` +
554
+ `The original timestamp will be proportionally distributed based on word length.\n\n` +
555
+ `Do you want to proceed with this split?`;
556
+
557
+ if (confirm(warningMessage)) {
558
+ const replacementWords = timestamps.map(t => ({
559
+ text: t.text,
560
+ time: {
561
+ start: parseFloat(t.start),
562
+ end: parseFloat(t.end)
563
+ },
564
+ semantic_tags: [],
565
+ structure_tags: [],
566
+ clips: [],
567
+ max_layout: { position: {x:0, y:0}, size: {width:0, height:0} }
568
+ }));
569
+
570
+ documentState.segments[segIndex].lines[lineIndex].words.splice(wordIndex, 1, ...replacementWords);
571
+ render();
572
+ }
573
+ }
574
+
575
+ function editWordTags(segIndex, lineIndex, wordIndex) {
576
+ const word = documentState.segments[segIndex].lines[lineIndex].words[wordIndex];
577
+ currentEditingWord = { segIndex, lineIndex, wordIndex };
578
+
579
+ document.getElementById('modal-word-text').textContent = word.text;
580
+ document.getElementById('modal-tags').value = word.semantic_tags.map(t => t.name).join(', ');
581
+
582
+ tagModal.classList.add('show');
583
+ }
584
+
585
+ function deleteWord(segIndex, lineIndex, wordIndex) {
586
+ if (confirm('Delete this word?')) {
587
+ documentState.segments[segIndex].lines[lineIndex].words.splice(wordIndex, 1);
588
+ if (documentState.segments[segIndex].lines[lineIndex].words.length === 0) {
589
+ documentState.segments[segIndex].lines.splice(lineIndex, 1);
590
+ }
591
+ if (documentState.segments[segIndex].lines.length === 0) {
592
+ documentState.segments.splice(segIndex, 1);
593
+ }
594
+ render();
595
+ }
596
+ }
597
+
598
+ function splitLine(segIndex, lineIndex, wordIndex) {
599
+ const line = documentState.segments[segIndex].lines[lineIndex];
600
+ const splitAt = wordIndex + 1;
601
+
602
+ if (splitAt > 0 && splitAt < line.words.length) {
603
+ const wordsToMove = line.words.splice(splitAt);
604
+ const newLine = {
605
+ words: wordsToMove,
606
+ structure_tags: [],
607
+ time: { start: wordsToMove[0].time.start, end: wordsToMove[wordsToMove.length - 1].time.end },
608
+ max_layout: { position: {x:0, y:0}, size: {width:0, height:0} }
609
+ };
610
+
611
+ if (line.words.length > 0) {
612
+ line.time.end = line.words[line.words.length - 1].time.end;
613
+ }
614
+
615
+ documentState.segments[segIndex].lines.splice(lineIndex + 1, 0, newLine);
616
+ render();
617
+ }
618
+ }
619
+
620
+ function mergeLines(segIndex, lineIndex) {
621
+ if (lineIndex + 1 < documentState.segments[segIndex].lines.length) {
622
+ const nextLineWords = documentState.segments[segIndex].lines[lineIndex + 1].words;
623
+ documentState.segments[segIndex].lines[lineIndex].words.push(...nextLineWords);
624
+
625
+ const combinedLine = documentState.segments[segIndex].lines[lineIndex];
626
+ if (combinedLine.words.length > 0) {
627
+ combinedLine.time.start = combinedLine.words[0].time.start;
628
+ combinedLine.time.end = combinedLine.words[combinedLine.words.length - 1].time.end;
629
+ }
630
+
631
+ documentState.segments[segIndex].lines.splice(lineIndex + 1, 1);
632
+ render();
633
+ }
634
+ }
635
+
636
+ function splitSegments(segIndex, lineIndex) {
637
+ const segment = documentState.segments[segIndex];
638
+ const splitAt = lineIndex + 1;
639
+
640
+ if (splitAt > 0 && splitAt < segment.lines.length) {
641
+ const linesToMove = segment.lines.splice(splitAt);
642
+ const newSegment = {
643
+ lines: linesToMove,
644
+ structure_tags: [],
645
+ time: {
646
+ start: linesToMove[0].time.start,
647
+ end: linesToMove[linesToMove.length - 1].time.end
648
+ },
649
+ max_layout: { position: {x:0, y:0}, size: {width:0, height:0} }
650
+ };
651
+
652
+ if (segment.lines.length > 0) {
653
+ segment.time.end = segment.lines[segment.lines.length - 1].time.end;
654
+ }
655
+
656
+ documentState.segments.splice(segIndex + 1, 0, newSegment);
657
+ render();
658
+ }
659
+ }
660
+
661
+ function mergeSegments(segIndex) {
662
+ if (segIndex + 1 < documentState.segments.length) {
663
+ const nextSegmentLines = documentState.segments[segIndex + 1].lines;
664
+ documentState.segments[segIndex].lines.push(...nextSegmentLines);
665
+
666
+ const combinedSegment = documentState.segments[segIndex];
667
+ if (combinedSegment.lines.length > 0) {
668
+ combinedSegment.time.start = combinedSegment.lines[0].time.start;
669
+ combinedSegment.time.end = combinedSegment.lines[combinedSegment.lines.length - 1].time.end;
670
+ }
671
+
672
+ documentState.segments.splice(segIndex + 1, 1);
673
+ render();
674
+ }
675
+ }
676
+
677
+ document.getElementById('modal-save').addEventListener('click', () => {
678
+ if (currentEditingWord) {
679
+ const { segIndex, lineIndex, wordIndex } = currentEditingWord;
680
+ const tagsText = document.getElementById('modal-tags').value;
681
+ const tagNames = tagsText.split(',').map(t => t.trim()).filter(t => t);
682
+
683
+ documentState.segments[segIndex].lines[lineIndex].words[wordIndex].semantic_tags =
684
+ tagNames.map(name => ({ name }));
685
+
686
+ tagModal.classList.remove('show');
687
+ currentEditingWord = null;
688
+ render();
689
+ }
690
+ });
691
+
692
+ document.getElementById('modal-cancel').addEventListener('click', () => {
693
+ tagModal.classList.remove('show');
694
+ currentEditingWord = null;
695
+ });
696
+
697
+ document.addEventListener('click', (e) => {
698
+ if (currentWordDropdown && !e.target.closest('.word-dropdown')) {
699
+ currentWordDropdown.remove();
700
+ currentWordDropdown = null;
701
+ }
702
+ });
703
+
704
+ document.getElementById('save-btn').addEventListener('click', () => {
705
+ documentState.segments.forEach(seg => {
706
+ if (seg.lines && seg.lines.length > 0) {
707
+ seg.lines.forEach(line => {
708
+ if (line.words && line.words.length > 0) {
709
+ line.time.start = line.words[0].time.start;
710
+ line.time.end = line.words[line.words.length - 1].time.end;
711
+ }
712
+ });
713
+ seg.time.start = seg.lines[0].time.start;
714
+ seg.time.end = seg.lines[seg.lines.length - 1].time.end;
715
+ }
716
+ });
717
+
718
+ sendDataToPython({ "action": "save", "document": documentState });
719
+ });
720
+
721
+ document.getElementById('cancel-btn').addEventListener('click', () => {
722
+ sendDataToPython({ "action": "cancel" });
723
+ });
724
+
725
+ render();
726
+ }
727
+
728
+ window.addEventListener("message", onDataFromPython);
729
+ init();
730
+
731
+ setFrameHeight(700);
732
+ </script>
733
+ </body>
734
+ </html>
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
- altair
2
- pandas
3
- streamlit
 
1
+ streamlit
2
+ git+https://github.com/francozanardi/pycaps.git
 
src/.streamlit/config.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ enableXsrfProtection = false
3
+ enableCORS = false
4
+
5
+ [client]
6
+ showErrorDetails = true
7
+
8
+ [browser]
9
+ gatherUsageStats = false
src/streamlit_app.py CHANGED
@@ -1,40 +1,245 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+ import tempfile
4
+ from pathlib import Path
5
+ import shutil
6
+ import uuid
7
+ import glob
8
+ from editor import subtitle_editor
9
 
10
+ from pycaps import Document, WhisperAudioTranscriber, TemplateLoader, logger
11
+ from pycaps.api import ApiKeyService
12
+ import pycaps.video.render.audio_utils as audio_utils
13
+ import logging
14
+
15
+ MAX_CONCURRENT_JOBS = 2
16
+ LOCK_DIR = os.path.join(tempfile.gettempdir(), "pycaps_locks")
17
+ os.makedirs(LOCK_DIR, exist_ok=True)
18
+ MAX_VIDEO_SIZE = 50 * 1024 * 1024
19
+ logger.set_logging_level(logging.DEBUG)
20
+
21
+ def acquire_lock_slot():
22
+ current_jobs = glob.glob(os.path.join(LOCK_DIR, "*.lock"))
23
+ if len(current_jobs) >= MAX_CONCURRENT_JOBS: return None
24
+ lock_id = str(uuid.uuid4())
25
+ lock_file_path = os.path.join(LOCK_DIR, f"{lock_id}.lock")
26
+ with open(lock_file_path, "w") as f: f.write(str(os.getpid()))
27
+ return lock_file_path
28
+
29
+ def release_lock_slot(lock_file_path):
30
+ if lock_file_path and os.path.exists(lock_file_path):
31
+ try: os.remove(lock_file_path)
32
+ except OSError: pass
33
+
34
+ def get_queue_status():
35
+ current_jobs = len(glob.glob(os.path.join(LOCK_DIR, "*.lock")))
36
+ return current_jobs, MAX_CONCURRENT_JOBS
37
+
38
+ def setup_api_keys(key_type: str, key: str):
39
+ if key:
40
+ if key_type == "Pycaps API (Recommended)": ApiKeyService.set(key)
41
+ elif key_type == "OpenAI API": os.environ["PYCAPS_OPENAI_API_KEY"] = key
42
+
43
+ def cleanup_api_keys():
44
+ if ApiKeyService.has(): ApiKeyService.remove()
45
+ if "PYCAPS_OPENAI_API_KEY" in os.environ: del os.environ["PYCAPS_OPENAI_API_KEY"]
46
+
47
+ def display_video(video_path):
48
+ with st.container():
49
+ st.video(video_path)
50
+ with open(video_path, "rb") as file: video_bytes = file.read()
51
+ st.download_button("⬇️ Download Video", video_bytes, f"pycaps_{Path(video_path).stem}.mp4", "video/mp4")
52
+
53
+ def build_pipeline_from_state():
54
+ if not st.session_state.video_path or not st.session_state.selected_template:
55
+ return None
56
+
57
+ builder = TemplateLoader(st.session_state.selected_template).with_input_video(st.session_state.video_path).load(False)
58
+ pipeline = builder.build()
59
+ return pipeline
60
+
61
+ def go_to_step(step): st.session_state.current_step = step
62
+ def reset_all():
63
+ persisted_keys = ['api_key_type', 'api_key_input']
64
+ if 'video_path' in st.session_state and st.session_state.video_path and os.path.exists(st.session_state.video_path):
65
+ os.remove(st.session_state.video_path)
66
+ for key in list(st.session_state.keys()):
67
+ if key not in persisted_keys: del st.session_state[key]
68
+ go_to_step(1)
69
+
70
+ if 'current_step' not in st.session_state: st.session_state.current_step = 1
71
+ if 'video_path' not in st.session_state: st.session_state.video_path = None
72
+ if 'transcribed_doc' not in st.session_state: st.session_state.transcribed_doc = None
73
+ if 'processed_doc' not in st.session_state: st.session_state.processed_doc = None
74
+ if 'edit_requested' not in st.session_state: st.session_state.edit_requested = False
75
+ if 'final_video_path' not in st.session_state: st.session_state.final_video_path = None
76
+ if 'session_id' not in st.session_state: st.session_state.session_id = str(uuid.uuid4())
77
+ if 'selected_template' not in st.session_state: st.session_state.selected_template = None
78
+
79
+ st.set_page_config(layout="wide", page_title="Pycaps Demo")
80
+ st.title("🎬 Pycaps Demo")
81
+ st.markdown(f"""
82
+ <style>
83
+ .stElementContainer > video {{
84
+ max-height: 60vh;
85
+ }}
86
+ </style>
87
+ """, unsafe_allow_html=True)
88
+ with st.sidebar:
89
+ st.header("⚙️ API Configuration")
90
+ api_key_type = st.radio("Select API Key Type", ("Pycaps API (Recommended)", "OpenAI API"), key="api_key_type")
91
+ api_key = st.text_input("Enter your API Key", type="password", key="api_key_input")
92
+ st.markdown("---")
93
+ active_jobs, max_jobs = get_queue_status()
94
+ st.metric(label="Jobs Running", value=f"{active_jobs} / {max_jobs}")
95
+
96
+ # ==============================================================================
97
+ # STEP 1: UPLOAD & TRANSCRIBE
98
+ # ==============================================================================
99
+ if st.session_state.current_step == 1:
100
+ st.header("Upload Your Video")
101
+ uploaded_file = st.file_uploader("Select a video file (max 50MB)", type=["mp4", "mov"], key=f"uploader_{st.session_state.session_id}")
102
+ if uploaded_file:
103
+ if uploaded_file.size > MAX_VIDEO_SIZE:
104
+ st.error(f"File is too large ({uploaded_file.size / (1024*1024):.1f}MB). Max is {MAX_VIDEO_SIZE // (1024*1024)}MB.")
105
+ elif st.button("Start", type="primary"):
106
+ with tempfile.TemporaryDirectory() as temp_dir:
107
+ video_path = Path(temp_dir) / uploaded_file.name
108
+ with open(video_path, "wb") as f: f.write(uploaded_file.getbuffer())
109
+ with st.spinner("Analyzing audio... 🎧"):
110
+ try:
111
+ audio_path = os.path.join(temp_dir, "audio.wav")
112
+ audio_utils.extract_audio_for_whisper(str(video_path), audio_path)
113
+ transcriber = WhisperAudioTranscriber(model_size="base")
114
+ document = transcriber.transcribe(audio_path)
115
+
116
+ st.session_state.transcribed_doc = document.to_dict()
117
+ persisted_path = os.path.join(tempfile.gettempdir(), f"session_{st.session_state.session_id}.mp4")
118
+ shutil.copy(video_path, persisted_path)
119
+ st.session_state.video_path = persisted_path
120
+ go_to_step(2)
121
+ st.rerun()
122
+ except Exception as e:
123
+ st.error(f"Transcription failed: {e}")
124
+ import traceback; traceback.print_exc()
125
+
126
+ # ==============================================================================
127
+ # STEP 2: CONFIGURE TEMPLATE AND PROCESS
128
+ # ==============================================================================
129
+ elif st.session_state.current_step == 2:
130
+ st.header("Configure & Process")
131
+ template_name = st.selectbox("Choose a Style", ["classic", "word-focus", "line-focus", "minimalist", "neo-minimal", "hype", "retro-gaming", "vibrant"])
132
+ st.session_state.edit_requested = st.checkbox("I want to review and edit the processed subtitles before rendering.", value=st.session_state.edit_requested)
133
+
134
+ if st.button("Next", type="primary"):
135
+ st.session_state.selected_template = template_name
136
+ with st.spinner("Applying template, effects, and tags... ⚙️"):
137
+ try:
138
+ pipeline = build_pipeline_from_state()
139
+ if not pipeline:
140
+ raise RuntimeError("Could not build pipeline. Missing video or template selection.")
141
+
142
+ setup_api_keys(api_key_type, api_key)
143
+ pipeline.prepare()
144
+ document = Document.from_dict(st.session_state.transcribed_doc)
145
+ processed_document = pipeline.process_document(document)
146
+ pipeline.close()
147
+
148
+ st.session_state.processed_doc = processed_document.to_dict()
149
+ go_to_step(3)
150
+ st.rerun()
151
+
152
+
153
+ except Exception as e:
154
+ st.error(f"Processing failed: {e}")
155
+ import traceback; traceback.print_exc()
156
+ finally:
157
+ cleanup_api_keys()
158
+
159
+ if st.button("⬅️ Back"): reset_all(); st.rerun()
160
+
161
+ # ==============================================================================
162
+ # STEP 3: EDIT (OPTIONAL) & RENDER
163
+ # ==============================================================================
164
+ elif st.session_state.current_step == 3:
165
+ if st.session_state.edit_requested:
166
+ st.header("Edit Subtitles")
167
+ st.markdown("Make your changes in the editor below. Your progress is saved automatically when you click 'Save' or 'Cancel'.")
168
+
169
+ editor_result = subtitle_editor(
170
+ initial_document=st.session_state.processed_doc,
171
+ key=f"editor_{st.session_state.session_id}"
172
+ )
173
+
174
+ if editor_result is not None:
175
+ if editor_result.get("action") == "save":
176
+ st.session_state.processed_doc = editor_result.get("document")
177
+ st.toast("✅ Subtitles saved!")
178
+ elif editor_result.get("action") == "cancel":
179
+ st.toast("Editing cancelled.")
180
+
181
+ go_to_step(4)
182
+ st.rerun()
183
+
184
+ else:
185
+ go_to_step(4)
186
+ st.rerun()
187
+
188
+ # ==============================================================================
189
+ # STEP 4: RENDER & VIEW
190
+ # ==============================================================================
191
+ elif st.session_state.current_step == 4:
192
+ st.header("Final Render")
193
+
194
+ lock_file = acquire_lock_slot()
195
+ if not lock_file:
196
+ st.warning("🚧 Renderer is at full capacity. Please try again.")
197
+ if st.button("⬅️ Go Back to Configuration"): go_to_step(2); st.rerun()
198
+ else:
199
+ try:
200
+ with st.spinner("Rendering final video... This is the last step! 🎬"):
201
+ pipeline = build_pipeline_from_state()
202
+ if not pipeline:
203
+ raise RuntimeError("Could not build pipeline for rendering.")
204
+
205
+ setup_api_keys(api_key_type, api_key)
206
+ pipeline.prepare()
207
+
208
+ document_to_render = Document.from_dict(st.session_state.processed_doc)
209
+
210
+ pipeline.render(document_to_render)
211
+
212
+ if pipeline._output_video_path and os.path.exists(pipeline._output_video_path):
213
+ st.session_state.final_video_path = pipeline._output_video_path
214
+ go_to_step(5)
215
+ st.rerun()
216
+ else:
217
+ st.error("Render failed. Check the logs.")
218
+ finally:
219
+ release_lock_slot(lock_file)
220
+ if 'pipeline_instance' in st.session_state:
221
+ st.session_state.pipeline_instance.close()
222
+ del st.session_state.pipeline_instance
223
+
224
+ # ==============================================================================
225
+ # STEP 5: VIEW & DOWNLOAD
226
+ # ==============================================================================
227
+ elif st.session_state.current_step == 5:
228
+ st.header("Your Video is Ready!")
229
+ if 'final_video_path' in st.session_state and st.session_state.final_video_path:
230
+ display_video(st.session_state.final_video_path)
231
+ else:
232
+ st.error("Could not find the final video.")
233
+
234
+ col1, col2 = st.columns(2)
235
+ with col1:
236
+ if st.button("⬅️ Choose Another Style", use_container_width=True):
237
+ keys_to_delete = ['processed_doc', 'final_video_path', 'edit_requested']
238
+ for key in keys_to_delete:
239
+ if key in st.session_state: del st.session_state[key]
240
+ go_to_step(2)
241
+ st.rerun()
242
+ with col2:
243
+ if st.button("🏠 Start with a New Video", use_container_width=True):
244
+ reset_all()
245
+ st.rerun()