duck3-create Claude Opus 4.6 commited on
Commit
0e7d362
·
0 Parent(s):

Initial commit: YouTube transcript extraction web service

Browse files

FastAPI backend with youtube-transcript-api for extracting subtitles
from YouTube videos. Supports batch processing (up to 20 URLs),
text/JSON output, Korean noise removal, and dark/light theme frontend.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (6) hide show
  1. .gitignore +11 -0
  2. Procfile +1 -0
  3. main.py +179 -0
  4. requirements.txt +3 -0
  5. runtime.txt +1 -0
  6. static/index.html +842 -0
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .env
8
+ .venv/
9
+ venv/
10
+ env/
11
+ .omc/
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: uvicorn main:app --host 0.0.0.0 --port ${PORT:-8000}
main.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import FileResponse, JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ from youtube_transcript_api import YouTubeTranscriptApi
6
+ import re
7
+ import asyncio
8
+ from concurrent.futures import ThreadPoolExecutor
9
+
10
+ app = FastAPI(title="YouTube Transcript Extractor")
11
+
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+
19
+ _executor = ThreadPoolExecutor(max_workers=5)
20
+ _yt_api = YouTubeTranscriptApi()
21
+
22
+
23
+ class TranscriptRequest(BaseModel):
24
+ urls: list[str]
25
+ language: str = "ko"
26
+ denoise: bool = False
27
+ format: str = "text"
28
+
29
+
30
+ def extract_video_id(url: str) -> str | None:
31
+ url = url.strip()
32
+ if not url:
33
+ return None
34
+ patterns = [
35
+ r"(?:youtube\.com/watch\?.*v=)([a-zA-Z0-9_-]{11})",
36
+ r"(?:youtu\.be/)([a-zA-Z0-9_-]{11})",
37
+ r"(?:youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
38
+ r"(?:youtube\.com/shorts/)([a-zA-Z0-9_-]{11})",
39
+ r"^([a-zA-Z0-9_-]{11})$",
40
+ ]
41
+ for pattern in patterns:
42
+ match = re.search(pattern, url)
43
+ if match:
44
+ return match.group(1)
45
+ return None
46
+
47
+
48
+ KOREAN_FILLERS = {
49
+ "어", "음", "그", "아", "네", "예", "에", "으", "흠",
50
+ "어어", "음음", "아아", "네네", "예예",
51
+ }
52
+
53
+ NOISE_PATTERN = re.compile(r"^\[.*\]$")
54
+
55
+
56
+ def denoise_text(text: str) -> str:
57
+ lines = text.split("\n")
58
+ result = []
59
+ prev = None
60
+ for line in lines:
61
+ stripped = line.strip()
62
+ if not stripped:
63
+ continue
64
+ if stripped in KOREAN_FILLERS:
65
+ continue
66
+ if NOISE_PATTERN.match(stripped):
67
+ continue
68
+ if stripped == prev:
69
+ continue
70
+ result.append(stripped)
71
+ prev = stripped
72
+ return "\n".join(result)
73
+
74
+
75
+ def _fetch_transcript(video_id: str, language: str, denoise: bool, fmt: str) -> dict:
76
+ try:
77
+ languages = [language]
78
+ if language == "ko":
79
+ languages.append("en")
80
+ elif language == "en":
81
+ languages.append("ko")
82
+
83
+ data = _yt_api.fetch(video_id, languages=languages)
84
+
85
+ if fmt == "json":
86
+ entries = [
87
+ {"text": e.text, "start": e.start, "duration": e.duration}
88
+ for e in data
89
+ ]
90
+ if denoise:
91
+ deduped = []
92
+ prev_text = None
93
+ for entry in entries:
94
+ t = entry["text"].strip()
95
+ if t in KOREAN_FILLERS or NOISE_PATTERN.match(t):
96
+ continue
97
+ if t == prev_text:
98
+ continue
99
+ if t:
100
+ entry["text"] = t
101
+ deduped.append(entry)
102
+ prev_text = t
103
+ entries = deduped
104
+ return {"transcript": entries, "error": None}
105
+ else:
106
+ text = "\n".join(e.text for e in data)
107
+ if denoise:
108
+ text = denoise_text(text)
109
+ return {"transcript": text, "error": None}
110
+ except Exception as e:
111
+ error_msg = str(e)
112
+ if "No transcripts" in error_msg or "Could not retrieve" in error_msg:
113
+ error_msg = "자막을 찾을 수 없습니다."
114
+ elif "disabled" in error_msg.lower():
115
+ error_msg = "이 영상은 자막이 비활성화되어 있습니다."
116
+ elif "unavailable" in error_msg.lower():
117
+ error_msg = "영상을 찾을 수 없습니다."
118
+ return {"transcript": None, "error": error_msg}
119
+
120
+
121
+ @app.post("/api/transcripts")
122
+ async def get_transcripts(request: TranscriptRequest):
123
+ if len(request.urls) > 20:
124
+ return JSONResponse(
125
+ status_code=400,
126
+ content={"error": "최대 20개의 URL만 처리할 수 있습니다."},
127
+ )
128
+
129
+ urls = [u.strip() for u in request.urls if u.strip()]
130
+ if not urls:
131
+ return JSONResponse(
132
+ status_code=400,
133
+ content={"error": "URL을 하나 이상 입력해주세요."},
134
+ )
135
+
136
+ loop = asyncio.get_event_loop()
137
+
138
+ async def process_url(url: str):
139
+ video_id = extract_video_id(url)
140
+ if not video_id:
141
+ return {
142
+ "url": url,
143
+ "video_id": None,
144
+ "transcript": None,
145
+ "error": "유효하지 않은 YouTube URL입니다.",
146
+ }
147
+
148
+ result = await loop.run_in_executor(
149
+ _executor,
150
+ _fetch_transcript,
151
+ video_id,
152
+ request.language,
153
+ request.denoise,
154
+ request.format,
155
+ )
156
+
157
+ return {
158
+ "url": url,
159
+ "video_id": video_id,
160
+ "transcript": result["transcript"],
161
+ "error": result["error"],
162
+ }
163
+
164
+ results = list(await asyncio.gather(*[process_url(url) for url in urls]))
165
+
166
+ success_count = sum(1 for r in results if r["error"] is None)
167
+ error_count = sum(1 for r in results if r["error"] is not None)
168
+
169
+ return {
170
+ "results": results,
171
+ "total": len(urls),
172
+ "success_count": success_count,
173
+ "error_count": error_count,
174
+ }
175
+
176
+
177
+ @app.get("/")
178
+ async def root():
179
+ return FileResponse("static/index.html")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ youtube-transcript-api
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
static/index.html ADDED
@@ -0,0 +1,842 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="ko">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>YouTube Transcript</title>
7
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/orioncactus/pretendard/dist/web/static/pretendard.min.css">
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
11
+ <style>
12
+ :root {
13
+ --bg-primary: #fafafa;
14
+ --bg-secondary: #ffffff;
15
+ --bg-surface: #f4f4f5;
16
+ --border: #e4e4e7;
17
+ --text-primary: #18181b;
18
+ --text-secondary: #71717a;
19
+ --text-tertiary: #a1a1aa;
20
+ --accent: #4f46e5;
21
+ --accent-hover: #4338ca;
22
+ --accent-light: rgba(79, 70, 229, 0.08);
23
+ --error: #ef4444;
24
+ --error-light: rgba(239, 68, 68, 0.08);
25
+ --success: #16a34a;
26
+ --font-sans: 'Pretendard', -apple-system, BlinkMacSystemFont, system-ui, sans-serif;
27
+ --font-mono: 'JetBrains Mono', monospace;
28
+ --radius: 8px;
29
+ --transition: 200ms ease;
30
+ }
31
+
32
+ @media (prefers-color-scheme: dark) {
33
+ :root {
34
+ --bg-primary: #111113;
35
+ --bg-secondary: #1b1b1f;
36
+ --bg-surface: #232328;
37
+ --border: #2e2e35;
38
+ --text-primary: #ededef;
39
+ --text-secondary: #8f8f96;
40
+ --text-tertiary: #5c5c63;
41
+ --accent: #6366f1;
42
+ --accent-hover: #818cf8;
43
+ --accent-light: rgba(99, 102, 241, 0.1);
44
+ --error: #f87171;
45
+ --error-light: rgba(248, 113, 113, 0.1);
46
+ --success: #4ade80;
47
+ }
48
+ }
49
+
50
+ *, *::before, *::after {
51
+ box-sizing: border-box;
52
+ margin: 0;
53
+ padding: 0;
54
+ }
55
+
56
+ html {
57
+ font-size: 16px;
58
+ -webkit-font-smoothing: antialiased;
59
+ -moz-osx-font-smoothing: grayscale;
60
+ }
61
+
62
+ body {
63
+ font-family: var(--font-sans);
64
+ background: var(--bg-primary);
65
+ color: var(--text-primary);
66
+ line-height: 1.6;
67
+ letter-spacing: -0.01em;
68
+ min-height: 100vh;
69
+ }
70
+
71
+ .container {
72
+ max-width: 680px;
73
+ margin: 0 auto;
74
+ padding: 64px 24px 96px;
75
+ }
76
+
77
+ /* Header */
78
+ header {
79
+ margin-bottom: 48px;
80
+ }
81
+
82
+ header h1 {
83
+ font-size: 28px;
84
+ font-weight: 700;
85
+ letter-spacing: -0.03em;
86
+ line-height: 1.2;
87
+ color: var(--text-primary);
88
+ }
89
+
90
+ header .subtitle {
91
+ font-size: 14px;
92
+ color: var(--text-secondary);
93
+ margin-top: 8px;
94
+ font-weight: 400;
95
+ }
96
+
97
+ /* Input Section */
98
+ .input-section {
99
+ display: flex;
100
+ flex-direction: column;
101
+ gap: 20px;
102
+ }
103
+
104
+ textarea {
105
+ width: 100%;
106
+ min-height: 160px;
107
+ padding: 16px;
108
+ font-family: var(--font-mono);
109
+ font-size: 14px;
110
+ line-height: 1.7;
111
+ color: var(--text-primary);
112
+ background: var(--bg-surface);
113
+ border: 1px solid var(--border);
114
+ border-radius: var(--radius);
115
+ resize: vertical;
116
+ outline: none;
117
+ transition: border-color var(--transition);
118
+ }
119
+
120
+ textarea::placeholder {
121
+ color: var(--text-tertiary);
122
+ }
123
+
124
+ textarea:focus {
125
+ border-color: var(--accent);
126
+ }
127
+
128
+ /* Options */
129
+ .options {
130
+ display: flex;
131
+ flex-wrap: wrap;
132
+ gap: 16px;
133
+ align-items: center;
134
+ }
135
+
136
+ .option-group {
137
+ display: flex;
138
+ align-items: center;
139
+ gap: 8px;
140
+ }
141
+
142
+ .option-label {
143
+ font-size: 12px;
144
+ font-weight: 500;
145
+ color: var(--text-secondary);
146
+ text-transform: uppercase;
147
+ letter-spacing: 0.05em;
148
+ }
149
+
150
+ /* Toggle Group */
151
+ .toggle-group {
152
+ display: flex;
153
+ border: 1px solid var(--border);
154
+ border-radius: var(--radius);
155
+ overflow: hidden;
156
+ }
157
+
158
+ .toggle-btn {
159
+ padding: 6px 14px;
160
+ font-family: var(--font-sans);
161
+ font-size: 13px;
162
+ font-weight: 500;
163
+ color: var(--text-secondary);
164
+ background: transparent;
165
+ border: none;
166
+ cursor: pointer;
167
+ transition: all var(--transition);
168
+ }
169
+
170
+ .toggle-btn + .toggle-btn {
171
+ border-left: 1px solid var(--border);
172
+ }
173
+
174
+ .toggle-btn.active {
175
+ background: var(--accent);
176
+ color: #fff;
177
+ }
178
+
179
+ .toggle-btn:hover:not(.active) {
180
+ background: var(--accent-light);
181
+ color: var(--text-primary);
182
+ }
183
+
184
+ /* Custom Checkbox */
185
+ .checkbox-wrapper {
186
+ display: flex;
187
+ align-items: center;
188
+ gap: 8px;
189
+ cursor: pointer;
190
+ font-size: 13px;
191
+ color: var(--text-secondary);
192
+ user-select: none;
193
+ }
194
+
195
+ .checkbox-wrapper input {
196
+ position: absolute;
197
+ opacity: 0;
198
+ width: 0;
199
+ height: 0;
200
+ }
201
+
202
+ .checkbox-custom {
203
+ width: 16px;
204
+ height: 16px;
205
+ border: 1.5px solid var(--border);
206
+ border-radius: 4px;
207
+ transition: all var(--transition);
208
+ position: relative;
209
+ flex-shrink: 0;
210
+ }
211
+
212
+ .checkbox-wrapper:hover .checkbox-custom {
213
+ border-color: var(--accent);
214
+ }
215
+
216
+ .checkbox-wrapper input:checked + .checkbox-custom {
217
+ background: var(--accent);
218
+ border-color: var(--accent);
219
+ }
220
+
221
+ .checkbox-wrapper input:checked + .checkbox-custom::after {
222
+ content: '';
223
+ position: absolute;
224
+ left: 4.5px;
225
+ top: 1.5px;
226
+ width: 5px;
227
+ height: 9px;
228
+ border: solid #fff;
229
+ border-width: 0 1.5px 1.5px 0;
230
+ transform: rotate(45deg);
231
+ }
232
+
233
+ /* Buttons */
234
+ .btn {
235
+ font-family: var(--font-sans);
236
+ font-size: 14px;
237
+ font-weight: 500;
238
+ border-radius: var(--radius);
239
+ cursor: pointer;
240
+ transition: all var(--transition);
241
+ border: none;
242
+ outline: none;
243
+ }
244
+
245
+ .btn-primary {
246
+ background: var(--accent);
247
+ color: #fff;
248
+ width: 100%;
249
+ padding: 12px;
250
+ }
251
+
252
+ .btn-primary:hover {
253
+ background: var(--accent-hover);
254
+ }
255
+
256
+ .btn-primary:disabled {
257
+ opacity: 0.5;
258
+ cursor: not-allowed;
259
+ }
260
+
261
+ .btn-secondary {
262
+ background: transparent;
263
+ color: var(--text-secondary);
264
+ border: 1px solid var(--border);
265
+ padding: 7px 14px;
266
+ font-size: 13px;
267
+ }
268
+
269
+ .btn-secondary:hover {
270
+ background: var(--bg-surface);
271
+ color: var(--text-primary);
272
+ }
273
+
274
+ .btn-sm {
275
+ padding: 5px 10px;
276
+ font-size: 12px;
277
+ }
278
+
279
+ /* Loading */
280
+ .loading-section {
281
+ margin-top: 48px;
282
+ }
283
+
284
+ .progress-bar {
285
+ width: 100%;
286
+ height: 2px;
287
+ background: var(--border);
288
+ border-radius: 1px;
289
+ overflow: hidden;
290
+ }
291
+
292
+ .progress-bar-inner {
293
+ width: 30%;
294
+ height: 100%;
295
+ background: var(--accent);
296
+ border-radius: 1px;
297
+ animation: progress 1.5s ease-in-out infinite;
298
+ }
299
+
300
+ @keyframes progress {
301
+ 0% { transform: translateX(-100%); }
302
+ 50% { transform: translateX(200%); }
303
+ 100% { transform: translateX(400%); }
304
+ }
305
+
306
+ .loading-text {
307
+ margin-top: 16px;
308
+ font-size: 14px;
309
+ color: var(--text-secondary);
310
+ text-align: center;
311
+ animation: pulse 2s ease-in-out infinite;
312
+ }
313
+
314
+ @keyframes pulse {
315
+ 0%, 100% { opacity: 1; }
316
+ 50% { opacity: 0.4; }
317
+ }
318
+
319
+ /* Results */
320
+ .results-section {
321
+ margin-top: 48px;
322
+ }
323
+
324
+ .results-header {
325
+ display: flex;
326
+ justify-content: space-between;
327
+ align-items: center;
328
+ margin-bottom: 20px;
329
+ }
330
+
331
+ .results-stats {
332
+ font-size: 14px;
333
+ color: var(--text-secondary);
334
+ }
335
+
336
+ .results-stats .success-count {
337
+ color: var(--success);
338
+ font-weight: 600;
339
+ }
340
+
341
+ .results-stats .error-count {
342
+ color: var(--error);
343
+ font-weight: 600;
344
+ }
345
+
346
+ .results-actions {
347
+ display: flex;
348
+ gap: 8px;
349
+ }
350
+
351
+ .results-list {
352
+ display: flex;
353
+ flex-direction: column;
354
+ gap: 16px;
355
+ }
356
+
357
+ /* Result Card */
358
+ .result-card {
359
+ background: var(--bg-surface);
360
+ border: 1px solid var(--border);
361
+ border-radius: var(--radius);
362
+ padding: 20px;
363
+ transition: box-shadow var(--transition);
364
+ animation: fadeIn 0.35s ease forwards;
365
+ opacity: 0;
366
+ }
367
+
368
+ .result-card:hover {
369
+ box-shadow: 0 1px 6px rgba(0, 0, 0, 0.04);
370
+ }
371
+
372
+ @media (prefers-color-scheme: dark) {
373
+ .result-card:hover {
374
+ box-shadow: 0 1px 6px rgba(0, 0, 0, 0.25);
375
+ }
376
+ }
377
+
378
+ @keyframes fadeIn {
379
+ from {
380
+ opacity: 0;
381
+ transform: translateY(8px);
382
+ }
383
+ to {
384
+ opacity: 1;
385
+ transform: translateY(0);
386
+ }
387
+ }
388
+
389
+ .result-card-header {
390
+ display: flex;
391
+ justify-content: space-between;
392
+ align-items: center;
393
+ margin-bottom: 12px;
394
+ gap: 12px;
395
+ }
396
+
397
+ .result-card-id {
398
+ font-family: var(--font-mono);
399
+ font-size: 13px;
400
+ color: var(--text-secondary);
401
+ overflow: hidden;
402
+ text-overflow: ellipsis;
403
+ white-space: nowrap;
404
+ min-width: 0;
405
+ }
406
+
407
+ .result-card-actions {
408
+ display: flex;
409
+ gap: 6px;
410
+ flex-shrink: 0;
411
+ }
412
+
413
+ .result-card-content {
414
+ font-family: var(--font-mono);
415
+ font-size: 13px;
416
+ line-height: 1.7;
417
+ color: var(--text-primary);
418
+ white-space: pre-wrap;
419
+ word-break: break-word;
420
+ max-height: 320px;
421
+ overflow-y: auto;
422
+ padding: 14px;
423
+ background: var(--bg-secondary);
424
+ border: 1px solid var(--border);
425
+ border-radius: 6px;
426
+ }
427
+
428
+ .result-card.is-error {
429
+ border-color: var(--error);
430
+ background: var(--error-light);
431
+ }
432
+
433
+ .result-card.is-error .result-card-content {
434
+ color: var(--error);
435
+ background: transparent;
436
+ border: none;
437
+ padding: 0;
438
+ max-height: none;
439
+ font-family: var(--font-sans);
440
+ font-size: 14px;
441
+ }
442
+
443
+ /* Scrollbar */
444
+ .result-card-content::-webkit-scrollbar {
445
+ width: 4px;
446
+ }
447
+
448
+ .result-card-content::-webkit-scrollbar-track {
449
+ background: transparent;
450
+ }
451
+
452
+ .result-card-content::-webkit-scrollbar-thumb {
453
+ background: var(--border);
454
+ border-radius: 2px;
455
+ }
456
+
457
+ /* Keyboard hint */
458
+ .keyboard-hint {
459
+ font-size: 12px;
460
+ color: var(--text-tertiary);
461
+ text-align: center;
462
+ margin-top: 8px;
463
+ }
464
+
465
+ kbd {
466
+ font-family: var(--font-mono);
467
+ font-size: 11px;
468
+ padding: 2px 5px;
469
+ background: var(--bg-surface);
470
+ border: 1px solid var(--border);
471
+ border-radius: 4px;
472
+ }
473
+
474
+ /* Copy feedback */
475
+ .btn.copied {
476
+ color: var(--success);
477
+ border-color: var(--success);
478
+ }
479
+
480
+ /* Responsive */
481
+ @media (max-width: 640px) {
482
+ .container {
483
+ padding: 32px 16px 64px;
484
+ }
485
+
486
+ header {
487
+ margin-bottom: 32px;
488
+ }
489
+
490
+ .options {
491
+ gap: 12px;
492
+ }
493
+
494
+ .results-header {
495
+ flex-direction: column;
496
+ align-items: flex-start;
497
+ gap: 12px;
498
+ }
499
+ }
500
+ </style>
501
+ </head>
502
+ <body>
503
+ <div class="container">
504
+ <header>
505
+ <h1>YouTube Transcript</h1>
506
+ <p class="subtitle">YouTube 영상의 자막을 텍스트로 추출합니다</p>
507
+ </header>
508
+
509
+ <section class="input-section">
510
+ <textarea id="urlInput" placeholder="YouTube URL을 한 줄에 하나씩 입력하세요 (최대 20개)&#10;&#10;https://www.youtube.com/watch?v=...&#10;https://youtu.be/..."></textarea>
511
+
512
+ <div class="options">
513
+ <div class="option-group">
514
+ <span class="option-label">형식</span>
515
+ <div class="toggle-group" data-name="format">
516
+ <button class="toggle-btn active" data-value="text">Text</button>
517
+ <button class="toggle-btn" data-value="json">JSON</button>
518
+ </div>
519
+ </div>
520
+
521
+ <div class="option-group">
522
+ <span class="option-label">언어</span>
523
+ <div class="toggle-group" data-name="language">
524
+ <button class="toggle-btn active" data-value="ko">한국어</button>
525
+ <button class="toggle-btn" data-value="en">English</button>
526
+ </div>
527
+ </div>
528
+
529
+ <div class="option-group">
530
+ <label class="checkbox-wrapper">
531
+ <input type="checkbox" id="denoise">
532
+ <span class="checkbox-custom"></span>
533
+ <span>노이즈 제거</span>
534
+ </label>
535
+ </div>
536
+
537
+ <div class="option-group">
538
+ <label class="checkbox-wrapper">
539
+ <input type="checkbox" id="metadata" checked>
540
+ <span class="checkbox-custom"></span>
541
+ <span>메타데이터 포함</span>
542
+ </label>
543
+ </div>
544
+ </div>
545
+
546
+ <button id="extractBtn" class="btn btn-primary">자막 추출</button>
547
+ <p class="keyboard-hint"><kbd>Ctrl</kbd> + <kbd>Enter</kbd> 로 바로 실행</p>
548
+ </section>
549
+
550
+ <div id="loading" class="loading-section" style="display: none;">
551
+ <div class="progress-bar">
552
+ <div class="progress-bar-inner"></div>
553
+ </div>
554
+ <p class="loading-text">자막을 추출하고 있습니다</p>
555
+ </div>
556
+
557
+ <section id="results" class="results-section" style="display: none;">
558
+ <div class="results-header">
559
+ <div class="results-stats" id="stats"></div>
560
+ <div class="results-actions">
561
+ <button class="btn btn-secondary" id="copyAllBtn">전체 복사</button>
562
+ <button class="btn btn-secondary" id="downloadAllBtn">전체 다운로드</button>
563
+ </div>
564
+ </div>
565
+ <div id="resultsList" class="results-list"></div>
566
+ </section>
567
+ </div>
568
+
569
+ <script>
570
+ (function () {
571
+ let currentFormat = 'text';
572
+ let currentLanguage = 'ko';
573
+ let currentResults = null;
574
+
575
+ const $ = (sel) => document.querySelector(sel);
576
+ const $$ = (sel) => document.querySelectorAll(sel);
577
+
578
+ const urlInput = $('#urlInput');
579
+ const extractBtn = $('#extractBtn');
580
+ const loading = $('#loading');
581
+ const resultsSection = $('#results');
582
+ const resultsList = $('#resultsList');
583
+ const stats = $('#stats');
584
+ const copyAllBtn = $('#copyAllBtn');
585
+ const downloadAllBtn = $('#downloadAllBtn');
586
+ const denoiseCheckbox = $('#denoise');
587
+ const metadataCheckbox = $('#metadata');
588
+
589
+ // Toggle groups
590
+ $$('.toggle-group').forEach(function (group) {
591
+ const name = group.dataset.name;
592
+ group.querySelectorAll('.toggle-btn').forEach(function (btn) {
593
+ btn.addEventListener('click', function () {
594
+ group.querySelectorAll('.toggle-btn').forEach(function (b) {
595
+ b.classList.remove('active');
596
+ });
597
+ btn.classList.add('active');
598
+ if (name === 'format') currentFormat = btn.dataset.value;
599
+ if (name === 'language') currentLanguage = btn.dataset.value;
600
+ });
601
+ });
602
+ });
603
+
604
+ // Extract
605
+ extractBtn.addEventListener('click', handleExtract);
606
+
607
+ urlInput.addEventListener('keydown', function (e) {
608
+ if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
609
+ e.preventDefault();
610
+ handleExtract();
611
+ }
612
+ });
613
+
614
+ async function handleExtract() {
615
+ const text = urlInput.value.trim();
616
+ if (!text) return;
617
+
618
+ const urls = text.split('\n').map(function (u) { return u.trim(); }).filter(function (u) { return u; });
619
+ if (urls.length === 0) return;
620
+ if (urls.length > 20) {
621
+ alert('최대 20개의 URL만 입력할 수 있습니다.');
622
+ return;
623
+ }
624
+
625
+ extractBtn.disabled = true;
626
+ loading.style.display = 'block';
627
+ resultsSection.style.display = 'none';
628
+ resultsList.innerHTML = '';
629
+
630
+ try {
631
+ const response = await fetch('/api/transcripts', {
632
+ method: 'POST',
633
+ headers: { 'Content-Type': 'application/json' },
634
+ body: JSON.stringify({
635
+ urls: urls,
636
+ language: currentLanguage,
637
+ denoise: denoiseCheckbox.checked,
638
+ format: currentFormat,
639
+ }),
640
+ });
641
+
642
+ const data = await response.json();
643
+
644
+ if (data.error) {
645
+ alert(data.error);
646
+ return;
647
+ }
648
+
649
+ currentResults = data;
650
+ renderResults(data);
651
+ } catch (err) {
652
+ alert('요청 중 오류가 발생했습니다: ' + err.message);
653
+ } finally {
654
+ extractBtn.disabled = false;
655
+ loading.style.display = 'none';
656
+ }
657
+ }
658
+
659
+ function renderResults(data) {
660
+ resultsSection.style.display = 'block';
661
+
662
+ let statsHtml = '<span class="success-count">' + data.success_count + '개 성공</span>';
663
+ if (data.error_count > 0) {
664
+ statsHtml += ' / <span class="error-count">' + data.error_count + '개 실패</span>';
665
+ }
666
+ stats.innerHTML = statsHtml;
667
+
668
+ resultsList.innerHTML = '';
669
+
670
+ data.results.forEach(function (result, index) {
671
+ var card = document.createElement('div');
672
+ card.className = 'result-card' + (result.error ? ' is-error' : '');
673
+ card.style.animationDelay = (index * 80) + 'ms';
674
+
675
+ var displayId = result.video_id || result.url;
676
+
677
+ var contentText = '';
678
+ if (result.error) {
679
+ contentText = result.error;
680
+ } else if (currentFormat === 'json') {
681
+ contentText = JSON.stringify(result.transcript, null, 2);
682
+ } else {
683
+ contentText = result.transcript;
684
+ }
685
+
686
+ var actionsHtml = '';
687
+ if (!result.error) {
688
+ actionsHtml = '<div class="result-card-actions">' +
689
+ '<button class="btn btn-secondary btn-sm btn-copy" data-index="' + index + '">복사</button>' +
690
+ '<button class="btn btn-secondary btn-sm btn-download" data-index="' + index + '">다운로드</button>' +
691
+ '</div>';
692
+ }
693
+
694
+ card.innerHTML =
695
+ '<div class="result-card-header">' +
696
+ '<span class="result-card-id">' + escapeHtml(displayId) + '</span>' +
697
+ actionsHtml +
698
+ '</div>' +
699
+ '<div class="result-card-content">' + escapeHtml(contentText) + '</div>';
700
+
701
+ resultsList.appendChild(card);
702
+ });
703
+
704
+ // Bind copy/download
705
+ $$('.btn-copy').forEach(function (btn) {
706
+ btn.addEventListener('click', function () {
707
+ copyResult(parseInt(btn.dataset.index), btn);
708
+ });
709
+ });
710
+
711
+ $$('.btn-download').forEach(function (btn) {
712
+ btn.addEventListener('click', function () {
713
+ downloadResult(parseInt(btn.dataset.index));
714
+ });
715
+ });
716
+ }
717
+
718
+ function getResultText(result) {
719
+ var includeMetadata = metadataCheckbox.checked;
720
+
721
+ if (currentFormat === 'json') {
722
+ if (includeMetadata) {
723
+ return JSON.stringify({
724
+ video_id: result.video_id,
725
+ url: result.url,
726
+ transcript: result.transcript,
727
+ }, null, 2);
728
+ }
729
+ return JSON.stringify(result.transcript, null, 2);
730
+ }
731
+
732
+ var text = '';
733
+ if (includeMetadata) {
734
+ if (result.video_id) text += 'Video ID: ' + result.video_id + '\n';
735
+ if (result.url) text += 'URL: ' + result.url + '\n';
736
+ text += '\n';
737
+ }
738
+ text += result.transcript;
739
+ return text;
740
+ }
741
+
742
+ async function copyResult(index, btn) {
743
+ var result = currentResults.results[index];
744
+ var text = getResultText(result);
745
+
746
+ try {
747
+ await navigator.clipboard.writeText(text);
748
+ showCopied(btn);
749
+ } catch (err) {
750
+ fallbackCopy(text);
751
+ showCopied(btn);
752
+ }
753
+ }
754
+
755
+ function downloadResult(index) {
756
+ var result = currentResults.results[index];
757
+ var text = getResultText(result);
758
+ var ext = currentFormat === 'json' ? 'json' : 'txt';
759
+ var filename = (result.video_id || 'transcript') + '.' + ext;
760
+ var mimeType = currentFormat === 'json' ? 'application/json' : 'text/plain';
761
+ downloadFile(filename, text, mimeType);
762
+ }
763
+
764
+ // Copy all
765
+ copyAllBtn.addEventListener('click', async function () {
766
+ if (!currentResults) return;
767
+
768
+ var successResults = currentResults.results.filter(function (r) { return !r.error; });
769
+ var allText = successResults.map(function (r) { return getResultText(r); }).join('\n\n---\n\n');
770
+
771
+ try {
772
+ await navigator.clipboard.writeText(allText);
773
+ showCopied(copyAllBtn);
774
+ } catch (err) {
775
+ fallbackCopy(allText);
776
+ showCopied(copyAllBtn);
777
+ }
778
+ });
779
+
780
+ // Download all
781
+ downloadAllBtn.addEventListener('click', function () {
782
+ if (!currentResults) return;
783
+
784
+ var successResults = currentResults.results.filter(function (r) { return !r.error; });
785
+
786
+ if (currentFormat === 'json') {
787
+ var includeMetadata = metadataCheckbox.checked;
788
+ var data = successResults.map(function (r) {
789
+ if (includeMetadata) {
790
+ return { video_id: r.video_id, url: r.url, transcript: r.transcript };
791
+ }
792
+ return r.transcript;
793
+ });
794
+ downloadFile('transcripts.json', JSON.stringify(data, null, 2), 'application/json');
795
+ } else {
796
+ var allText = successResults.map(function (r) { return getResultText(r); }).join('\n\n---\n\n');
797
+ downloadFile('transcripts.txt', allText, 'text/plain');
798
+ }
799
+ });
800
+
801
+ function downloadFile(filename, content, mimeType) {
802
+ var blob = new Blob([content], { type: mimeType + ';charset=utf-8' });
803
+ var url = URL.createObjectURL(blob);
804
+ var a = document.createElement('a');
805
+ a.href = url;
806
+ a.download = filename;
807
+ document.body.appendChild(a);
808
+ a.click();
809
+ document.body.removeChild(a);
810
+ URL.revokeObjectURL(url);
811
+ }
812
+
813
+ function showCopied(btn) {
814
+ var original = btn.textContent;
815
+ btn.textContent = '복사됨 ✓';
816
+ btn.classList.add('copied');
817
+ setTimeout(function () {
818
+ btn.textContent = original;
819
+ btn.classList.remove('copied');
820
+ }, 1500);
821
+ }
822
+
823
+ function fallbackCopy(text) {
824
+ var textarea = document.createElement('textarea');
825
+ textarea.value = text;
826
+ textarea.style.position = 'fixed';
827
+ textarea.style.opacity = '0';
828
+ document.body.appendChild(textarea);
829
+ textarea.select();
830
+ document.execCommand('copy');
831
+ document.body.removeChild(textarea);
832
+ }
833
+
834
+ function escapeHtml(text) {
835
+ var div = document.createElement('div');
836
+ div.textContent = text;
837
+ return div.innerHTML;
838
+ }
839
+ })();
840
+ </script>
841
+ </body>
842
+ </html>