github-actions[bot] commited on
Commit
437df61
·
1 Parent(s): ba2e30f

Auto-deploy from GitHub: 12d87bf8a39dae48107762c05b51670ce30e1d02

Browse files
app.py DELETED
@@ -1,463 +0,0 @@
1
- from flask import Flask, request, jsonify, send_from_directory
2
- from flask_cors import CORS
3
- import sqlite3
4
- import os
5
- import uuid
6
- from datetime import datetime
7
- from werkzeug.utils import secure_filename
8
- import threading
9
- import subprocess
10
- import time
11
-
12
- app = Flask(__name__)
13
- CORS(app)
14
-
15
- UPLOAD_FOLDER = 'uploads'
16
- ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac', 'ogg', 'm4a', 'aac', 'mp4', 'mkv', 'avi', 'mov'}
17
-
18
- os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
- os.makedirs('temp_dir', exist_ok=True)
20
-
21
- # Worker state
22
- worker_thread = None
23
- worker_running = False
24
-
25
- def init_db():
26
- conn = sqlite3.connect('audio_captions.db')
27
- c = conn.cursor()
28
- c.execute('''CREATE TABLE IF NOT EXISTS audio_files
29
- (id TEXT PRIMARY KEY,
30
- filename TEXT NOT NULL,
31
- filepath TEXT NOT NULL,
32
- status TEXT NOT NULL,
33
- caption TEXT,
34
- created_at TEXT NOT NULL,
35
- processed_at TEXT,
36
- progress INTEGER DEFAULT 0,
37
- progress_text TEXT,
38
- hide_from_ui INTEGER DEFAULT 0)'''
39
- )
40
- conn.commit()
41
- conn.close()
42
-
43
- def allowed_file(filename):
44
- return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
45
-
46
- def start_worker():
47
- """Start the worker thread if not already running"""
48
- global worker_thread, worker_running
49
-
50
- if not worker_running:
51
- worker_running = True
52
- worker_thread = threading.Thread(target=worker_loop, daemon=True)
53
- worker_thread.start()
54
- print("✅ Worker thread started")
55
-
56
- def cleanup_old_entries():
57
- """Delete database entries and audio files older than 10 days"""
58
- from datetime import timedelta
59
-
60
- try:
61
- conn = sqlite3.connect('audio_captions.db')
62
- conn.row_factory = sqlite3.Row
63
- c = conn.cursor()
64
-
65
- # Calculate cutoff date (10 days ago)
66
- cutoff_date = (datetime.now() - timedelta(days=10)).isoformat()
67
-
68
- # First, get all old entries to delete their audio files
69
- c.execute('''SELECT id, filepath FROM audio_files
70
- WHERE created_at < ?''', (cutoff_date,))
71
- old_entries = c.fetchall()
72
-
73
- if old_entries:
74
- deleted_files = 0
75
- deleted_rows = 0
76
-
77
- for entry in old_entries:
78
- # Delete the audio file if it exists
79
- filepath = entry['filepath']
80
- if filepath and os.path.exists(filepath):
81
- try:
82
- os.remove(filepath)
83
- deleted_files += 1
84
- except Exception as e:
85
- print(f"⚠️ Failed to delete old audio file {filepath}: {e}")
86
-
87
- # Delete old database entries
88
- c.execute('''DELETE FROM audio_files WHERE created_at < ?''', (cutoff_date,))
89
- deleted_rows = c.rowcount
90
- conn.commit()
91
-
92
- if deleted_rows > 0 or deleted_files > 0:
93
- print(f"🧹 Cleanup: Deleted {deleted_rows} old entries and {deleted_files} audio files (older than 10 days)")
94
-
95
- conn.close()
96
- except Exception as e:
97
- print(f"⚠️ Cleanup error: {e}")
98
-
99
- def worker_loop():
100
- """Main worker loop that processes audio files"""
101
- print("🤖 STT Worker started. Monitoring for new audio files...")
102
-
103
- CWD = "./"
104
- PYTHON_PATH = "stt-transcribe"
105
- STT_MODEL_NAME = "parakeet"
106
- POLL_INTERVAL = 3 # seconds
107
-
108
- import shlex
109
- import json
110
-
111
- while worker_running:
112
- # Run cleanup before processing each task
113
- cleanup_old_entries()
114
- try:
115
- # Get next unprocessed file
116
- conn = sqlite3.connect('audio_captions.db')
117
- conn.row_factory = sqlite3.Row
118
- c = conn.cursor()
119
- c.execute('''SELECT * FROM audio_files
120
- WHERE status = 'not_started'
121
- ORDER BY created_at ASC
122
- LIMIT 1''')
123
- row = c.fetchone()
124
- conn.close()
125
-
126
- if row:
127
- file_id = row['id']
128
- filepath = row['filepath']
129
- filename = row['filename']
130
-
131
- print(f"\n{'='*60}")
132
- print(f"🎵 Processing: {filename}")
133
- print(f"📝 ID: {file_id}")
134
- print(f"{'='*60}")
135
-
136
- # Update status to processing
137
- update_status(file_id, 'processing')
138
-
139
- try:
140
- # Run STT command
141
- print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
142
- command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
143
-
144
- import re
145
-
146
- process = subprocess.Popen(
147
- command,
148
- shell=True,
149
- executable="/bin/bash",
150
- stdout=subprocess.PIPE,
151
- stderr=subprocess.STDOUT,
152
- cwd=CWD,
153
- text=True,
154
- bufsize=1,
155
- env={
156
- **os.environ,
157
- 'PYTHONUNBUFFERED': '1',
158
- 'CUDA_LAUNCH_BLOCKING': '1',
159
- 'USE_CPU_IF_POSSIBLE': 'true'
160
- }
161
- )
162
-
163
-
164
- current_chunk = 1
165
- total_chunks = 1
166
-
167
- for line in process.stdout:
168
- print(line, end='')
169
-
170
- # Track chunk progress
171
- chunk_match = re.search(r'Processing chunk (\d+)/(\d+)', line)
172
- if chunk_match:
173
- try:
174
- current_chunk = int(chunk_match.group(1))
175
- total_chunks = int(chunk_match.group(2))
176
- except: pass
177
-
178
- # Generic percentage matcher
179
- percent_match = re.search(r'(\d+)%', line)
180
- if percent_match:
181
- try:
182
- percent = int(percent_match.group(1))
183
- if 'audio' in line.lower() or 'extract' in line.lower():
184
- update_progress(file_id, percent // 2, "Extracting audio...")
185
- elif 'transcrib' in line.lower() or 'model' in line.lower():
186
- # Calculate overall transcription progress based on chunks
187
- chunk_base = ((current_chunk - 1) / total_chunks) * 100
188
- chunk_progress = (percent / total_chunks)
189
- overall_transcription_progress = chunk_base + chunk_progress
190
-
191
- # Remap so 50-100% of the overall bar is transcription
192
- overall_progress = int(50 + (overall_transcription_progress / 2))
193
- update_progress(file_id, overall_progress, f"Transcribing... (Chunk {current_chunk}/{total_chunks})")
194
- else:
195
- update_progress(file_id, percent, "Processing...")
196
- except: pass
197
-
198
- # Stage matchers
199
- if 'extracting audio' in line.lower():
200
- update_progress(file_id, 10, "Extracting audio...")
201
- elif 'transcription started' in line.lower() and total_chunks == 1:
202
- update_progress(file_id, 50, "Transcribing started...")
203
- elif 'model loaded' in line.lower():
204
- update_progress(file_id, 20, "Model loaded...")
205
-
206
- process.wait()
207
- if process.returncode != 0:
208
- raise Exception(f"STT process failed with return code {process.returncode}")
209
-
210
- # Read transcription result
211
- output_path = f'{CWD}/temp_dir/output_transcription.json'
212
- with open(output_path, 'r') as file:
213
- result = json.loads(file.read().strip())
214
-
215
- # Extract caption text
216
- caption = result.get('text', '') or result.get('transcription', '') or str(result)
217
-
218
- print(f"✅ Successfully processed: {filename}")
219
- print(f"📄 Caption preview: {caption[:100]}...")
220
-
221
- # Update database with success
222
- update_status(file_id, 'completed', caption=json.dumps(result))
223
-
224
- # Delete the audio file after successful processing
225
- if os.path.exists(filepath):
226
- os.remove(filepath)
227
- print(f"🗑️ Deleted audio file: {filepath}")
228
-
229
- except Exception as e:
230
- print(f"❌ Failed to process: {filename}")
231
- print(f"Error: {str(e)}")
232
- update_status(file_id, 'failed', error=str(e))
233
-
234
- # Don't delete file on failure (for debugging)
235
- # Optionally delete after some time or manual review
236
-
237
- else:
238
- # No files to process, sleep for a bit
239
- time.sleep(POLL_INTERVAL)
240
-
241
- except Exception as e:
242
- print(f"⚠️ Worker error: {str(e)}")
243
- time.sleep(POLL_INTERVAL)
244
-
245
- def update_progress(file_id, progress, progress_text=None):
246
- """Update the progress of a file in the database"""
247
- conn = sqlite3.connect('audio_captions.db')
248
- c = conn.cursor()
249
- c.execute('UPDATE audio_files SET progress = ?, progress_text = ? WHERE id = ?',
250
- (progress, progress_text, file_id))
251
- conn.commit()
252
- conn.close()
253
-
254
- def update_status(file_id, status, caption=None, error=None):
255
- """Update the status of a file in the database"""
256
- conn = sqlite3.connect('audio_captions.db')
257
- c = conn.cursor()
258
-
259
- if status == 'completed':
260
- c.execute('''UPDATE audio_files
261
- SET status = ?, caption = ?, processed_at = ?, progress = 100, progress_text = 'Completed'
262
- WHERE id = ?''',
263
- (status, caption, datetime.now().isoformat(), file_id))
264
- elif status == 'failed':
265
- c.execute('''UPDATE audio_files
266
- SET status = ?, caption = ?, processed_at = ?, progress_text = 'Failed'
267
- WHERE id = ?''',
268
- (status, f"Error: {error}", datetime.now().isoformat(), file_id))
269
- else:
270
- c.execute('UPDATE audio_files SET status = ? WHERE id = ?', (status, file_id))
271
-
272
- conn.commit()
273
- conn.close()
274
-
275
- @app.route('/')
276
- def index():
277
- return send_from_directory('.', 'index.html')
278
-
279
- @app.route('/api/upload', methods=['POST'])
280
- def upload_audio():
281
- if 'audio' not in request.files:
282
- return jsonify({'error': 'No audio file provided'}), 400
283
-
284
- file = request.files['audio']
285
-
286
- if file.filename == '':
287
- return jsonify({'error': 'No file selected'}), 400
288
-
289
- if not allowed_file(file.filename):
290
- return jsonify({'error': 'Invalid file type'}), 400
291
-
292
- file_id = str(uuid.uuid4())
293
- filename = secure_filename(file.filename)
294
- filepath = os.path.join(UPLOAD_FOLDER, f"{file_id}_{filename}")
295
- file.save(filepath)
296
-
297
- hide_from_ui_str = request.form.get('hide_from_ui', '')
298
- hide_from_ui_val = 1 if str(hide_from_ui_str).lower() in ['true', '1'] else 0
299
-
300
- conn = sqlite3.connect('audio_captions.db')
301
- c = conn.cursor()
302
- c.execute('''INSERT INTO audio_files
303
- (id, filename, filepath, status, created_at, hide_from_ui)
304
- VALUES (?, ?, ?, ?, ?, ?)''',
305
- (file_id, filename, filepath, 'not_started', datetime.now().isoformat(), hide_from_ui_val))
306
- conn.commit()
307
- conn.close()
308
-
309
- # Start worker on first upload
310
- start_worker()
311
-
312
- return jsonify({
313
- 'id': file_id,
314
- 'filename': filename,
315
- 'status': 'not_started',
316
- 'message': 'File uploaded successfully'
317
- }), 201
318
-
319
- def get_average_processing_time(cursor):
320
- """Calculate average processing time from completed files in seconds"""
321
- cursor.execute('''SELECT created_at, processed_at FROM audio_files
322
- WHERE status = 'completed' AND processed_at IS NOT NULL
323
- ORDER BY processed_at DESC LIMIT 20''')
324
- completed_rows = cursor.fetchall()
325
-
326
- if not completed_rows:
327
- return 30.0 # Default estimate: 30 seconds per file
328
-
329
- total_seconds = 0
330
- count = 0
331
- for r in completed_rows:
332
- try:
333
- created = datetime.fromisoformat(r['created_at'])
334
- processed = datetime.fromisoformat(r['processed_at'])
335
- duration = (processed - created).total_seconds()
336
- if duration > 0:
337
- total_seconds += duration
338
- count += 1
339
- except:
340
- continue
341
-
342
- return total_seconds / count if count > 0 else 30.0
343
-
344
- @app.route('/api/files', methods=['GET'])
345
- def get_files():
346
- conn = sqlite3.connect('audio_captions.db')
347
- conn.row_factory = sqlite3.Row
348
- c = conn.cursor()
349
-
350
- # Get average processing time
351
- avg_time = get_average_processing_time(c)
352
-
353
- # Get queue (files waiting to be processed, ordered by creation time)
354
- c.execute('''SELECT id FROM audio_files
355
- WHERE status = 'not_started'
356
- ORDER BY created_at ASC''')
357
- queue_ids = [row['id'] for row in c.fetchall()]
358
-
359
- # Check if there's a file currently processing
360
- c.execute('''SELECT COUNT(*) as count FROM audio_files WHERE status = 'processing' ''')
361
- processing_count = c.fetchone()['count']
362
-
363
- c.execute('SELECT * FROM audio_files WHERE hide_from_ui = 0 OR hide_from_ui IS NULL ORDER BY created_at DESC')
364
- rows = c.fetchall()
365
- conn.close()
366
-
367
- files = []
368
- for row in rows:
369
- # Calculate queue position (1-based) for files in queue
370
- queue_position = None
371
- estimated_start_seconds = None
372
-
373
- if row['status'] == 'not_started' and row['id'] in queue_ids:
374
- queue_position = queue_ids.index(row['id']) + 1
375
- # Estimate = (files ahead + currently processing) * avg time
376
- files_ahead = queue_position - 1 + processing_count
377
- estimated_start_seconds = round(files_ahead * avg_time)
378
-
379
- files.append({
380
- 'id': row['id'],
381
- 'filename': row['filename'],
382
- 'status': row['status'],
383
- 'caption': "HIDDEN_IN_LIST_VIEW", # Don't send full captions in list view
384
- 'created_at': row['created_at'],
385
- 'processed_at': row['processed_at'],
386
- 'progress': row['progress'] or 0,
387
- 'progress_text': row['progress_text'],
388
- 'queue_position': queue_position,
389
- 'estimated_start_seconds': estimated_start_seconds
390
- })
391
-
392
- return jsonify(files)
393
-
394
- @app.route('/api/files/<file_id>', methods=['GET'])
395
- def get_file(file_id):
396
- conn = sqlite3.connect('audio_captions.db')
397
- conn.row_factory = sqlite3.Row
398
- c = conn.cursor()
399
- c.execute('SELECT * FROM audio_files WHERE id = ?', (file_id,))
400
- row = c.fetchone()
401
-
402
- if row is None:
403
- conn.close()
404
- return jsonify({'error': 'File not found'}), 404
405
-
406
- # Calculate queue position and estimated time if file is waiting
407
- queue_position = None
408
- estimated_start_seconds = None
409
-
410
- if row['status'] == 'not_started':
411
- # Get average processing time
412
- avg_time = get_average_processing_time(c)
413
-
414
- # Count files ahead in queue
415
- c.execute('''SELECT COUNT(*) as position FROM audio_files
416
- WHERE status = 'not_started' AND created_at < ?''',
417
- (row['created_at'],))
418
- position_row = c.fetchone()
419
- queue_position = position_row['position'] + 1 # 1-based position
420
-
421
- # Check if there's a file currently processing
422
- c.execute('''SELECT COUNT(*) as count FROM audio_files WHERE status = 'processing' ''')
423
- processing_count = c.fetchone()['count']
424
-
425
- # Estimate = (files ahead + currently processing) * avg time
426
- files_ahead = queue_position - 1 + processing_count
427
- estimated_start_seconds = round(files_ahead * avg_time)
428
-
429
- conn.close()
430
-
431
- return jsonify({
432
- 'id': row['id'],
433
- 'filename': row['filename'],
434
- 'status': row['status'],
435
- 'caption': row['caption'],
436
- 'created_at': row['created_at'],
437
- 'processed_at': row['processed_at'],
438
- 'progress': row['progress'] or 0,
439
- 'progress_text': row['progress_text'],
440
- 'queue_position': queue_position,
441
- 'estimated_start_seconds': estimated_start_seconds
442
- })
443
-
444
- @app.route('/health', methods=['GET'])
445
- def health():
446
- return jsonify({
447
- 'status': 'healthy',
448
- 'service': 'audio-caption-generator',
449
- 'worker_running': worker_running
450
- })
451
-
452
- if __name__ == '__main__':
453
- init_db()
454
- print("\n" + "="*60)
455
- print("🚀 Audio Caption Generator API Server")
456
- print("="*60)
457
- print("📌 Worker will start automatically on first upload")
458
- print("🗑️ Audio files will be deleted after successful processing")
459
- print("="*60 + "\n")
460
-
461
- # Use PORT environment variable for Hugging Face compatibility
462
- port = int(os.environ.get('PORT', 7860))
463
- app.run(debug=False, host='0.0.0.0', port=port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/routes.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException
2
+ from fastapi.responses import FileResponse, JSONResponse
3
+ import os
4
+ import uuid
5
+ import aiofiles
6
+ from app.core.config import settings
7
+ from custom_logger import logger_config as logger
8
+ from app.db import crud
9
+ from app.services.worker import start_worker, is_worker_running
10
+
11
+ router = APIRouter()
12
+
13
+ def allowed_file(filename):
14
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in settings.ALLOWED_EXTENSIONS
15
+
16
+ @router.get("/")
17
+ async def index():
18
+ return FileResponse('index.html')
19
+
20
+ @router.post("/api/tasks/upload")
21
+ async def upload_task(audio: UploadFile = File(...), hide_from_ui: str = Form("")):
22
+ if not audio.filename:
23
+ raise HTTPException(status_code=400, detail="No file selected")
24
+
25
+ if not allowed_file(audio.filename):
26
+ raise HTTPException(status_code=400, detail="Invalid file type")
27
+
28
+ task_id = str(uuid.uuid4())
29
+ filename = audio.filename
30
+ filepath = os.path.join(settings.UPLOAD_FOLDER, f"{task_id}_{filename}")
31
+
32
+ try:
33
+ async with aiofiles.open(filepath, 'wb') as out_file:
34
+ content = await audio.read()
35
+ await out_file.write(content)
36
+ logger.info(f"File uploaded successfully: {filename} -> {filepath}")
37
+ except Exception as e:
38
+ logger.error(f"Error saving uploaded file {filename}: {e}")
39
+ raise HTTPException(status_code=500, detail="Could not save file")
40
+
41
+ hide_from_ui_val = 1 if hide_from_ui.lower() in ['true', '1'] else 0
42
+
43
+ await crud.insert_task(task_id, filename, filepath, 'not_started', hide_from_ui_val)
44
+
45
+ await start_worker()
46
+
47
+ return JSONResponse(status_code=201, content={
48
+ 'id': task_id,
49
+ 'filename': filename,
50
+ 'status': 'not_started',
51
+ 'message': 'File uploaded successfully'
52
+ })
53
+
54
+ @router.get("/api/tasks")
55
+ async def get_tasks():
56
+ rows, queue_ids, processing_count, avg_time = await crud.get_all_tasks()
57
+
58
+ tasks = []
59
+ for row in rows:
60
+ queue_position = None
61
+ estimated_start_seconds = None
62
+
63
+ if row['status'] == 'not_started' and row['id'] in queue_ids:
64
+ queue_position = queue_ids.index(row['id']) + 1
65
+ tasks_ahead = queue_position - 1 + processing_count
66
+ estimated_start_seconds = round(tasks_ahead * avg_time)
67
+
68
+ tasks.append({
69
+ 'id': row['id'],
70
+ 'filename': row['filename'],
71
+ 'status': row['status'],
72
+ 'result': "HIDDEN_IN_LIST_VIEW",
73
+ 'created_at': row['created_at'],
74
+ 'processed_at': row['processed_at'],
75
+ 'progress': row['progress'] or 0,
76
+ 'progress_text': row['progress_text'],
77
+ 'queue_position': queue_position,
78
+ 'estimated_start_seconds': estimated_start_seconds
79
+ })
80
+
81
+ return tasks
82
+
83
+ @router.get("/api/tasks/{task_id}")
84
+ async def get_task(task_id: str):
85
+ result = await crud.get_task_by_id(task_id)
86
+ if not result:
87
+ raise HTTPException(status_code=404, detail="Task not found")
88
+
89
+ row, queue_position, estimated_start_seconds = result
90
+
91
+ return {
92
+ 'id': row['id'],
93
+ 'filename': row['filename'],
94
+ 'status': row['status'],
95
+ 'result': row['result'],
96
+ 'created_at': row['created_at'],
97
+ 'processed_at': row['processed_at'],
98
+ 'progress': row['progress'] or 0,
99
+ 'progress_text': row['progress_text'],
100
+ 'queue_position': queue_position,
101
+ 'estimated_start_seconds': estimated_start_seconds
102
+ }
103
+
104
+ @router.get("/health")
105
+ async def health():
106
+ return {
107
+ 'status': 'healthy',
108
+ 'service': 'stt-backend',
109
+ 'worker_running': is_worker_running()
110
+ }
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ class Config:
4
+ PORT = int(os.environ.get('PORT', 7860))
5
+ UPLOAD_FOLDER = 'uploads'
6
+ TEMP_DIR = 'temp_dir'
7
+ DATABASE_FILE = 'audio_captions.db'
8
+ ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac', 'ogg', 'm4a', 'aac', 'mp4', 'mkv', 'avi', 'mov'}
9
+
10
+ CWD = "./"
11
+ PYTHON_PATH = "stt-transcribe"
12
+ STT_MODEL_NAME = "parakeet"
13
+ POLL_INTERVAL = 3
14
+
15
+ settings = Config()
16
+
17
+ os.makedirs(settings.UPLOAD_FOLDER, exist_ok=True)
18
+ os.makedirs(settings.TEMP_DIR, exist_ok=True)
app/db/__init__.py ADDED
File without changes
app/db/crud.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiosqlite
2
+ import os
3
+ from datetime import datetime, timedelta
4
+ from app.core.config import settings
5
+ from custom_logger import logger_config as logger
6
+
7
+ async def insert_task(task_id: str, filename: str, filepath: str, status: str, hide_from_ui: int):
8
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
9
+ await db.execute('''INSERT INTO tasks
10
+ (id, filename, filepath, status, created_at, hide_from_ui)
11
+ VALUES (?, ?, ?, ?, ?, ?)''',
12
+ (task_id, filename, filepath, status, datetime.now().isoformat(), hide_from_ui))
13
+ await db.commit()
14
+ logger.debug(f"Inserted task {filename} (ID: {task_id}) into database.")
15
+
16
+ async def update_status(task_id: str, status: str, result: str = None, error: str = None):
17
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
18
+ if status == 'completed':
19
+ await db.execute('''UPDATE tasks
20
+ SET status = ?, result = ?, processed_at = ?, progress = 100, progress_text = 'Completed'
21
+ WHERE id = ?''',
22
+ (status, result, datetime.now().isoformat(), task_id))
23
+ logger.info(f"Task ID {task_id} marked as completed.")
24
+ elif status == 'failed':
25
+ await db.execute('''UPDATE tasks
26
+ SET status = ?, result = ?, processed_at = ?, progress_text = 'Failed'
27
+ WHERE id = ?''',
28
+ (status, f"Error: {error}", datetime.now().isoformat(), task_id))
29
+ logger.error(f"Task ID {task_id} marked as failed. Error: {error}")
30
+ else:
31
+ await db.execute('UPDATE tasks SET status = ? WHERE id = ?', (status, task_id))
32
+ logger.debug(f"Task ID {task_id} status updated to {status}.")
33
+ await db.commit()
34
+
35
+ async def update_progress(task_id: str, progress: int, progress_text: str = None):
36
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
37
+ await db.execute('UPDATE tasks SET progress = ?, progress_text = ? WHERE id = ?',
38
+ (progress, progress_text, task_id))
39
+ await db.commit()
40
+ logger.debug(f"Task ID {task_id} progress updated to {progress}% ({progress_text}).")
41
+
42
+ async def get_next_not_started():
43
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
44
+ db.row_factory = aiosqlite.Row
45
+ async with db.execute('''SELECT * FROM tasks
46
+ WHERE status = 'not_started'
47
+ ORDER BY created_at ASC
48
+ LIMIT 1''') as cursor:
49
+ return await cursor.fetchone()
50
+
51
+ async def cleanup_old_entries():
52
+ try:
53
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
54
+ db.row_factory = aiosqlite.Row
55
+ cutoff_date = (datetime.now() - timedelta(days=10)).isoformat()
56
+
57
+ async with db.execute('''SELECT id, filepath FROM tasks
58
+ WHERE created_at < ?''', (cutoff_date,)) as cursor:
59
+ old_entries = await cursor.fetchall()
60
+
61
+ if old_entries:
62
+ deleted_files = 0
63
+ deleted_rows = 0
64
+
65
+ for entry in old_entries:
66
+ filepath = entry['filepath']
67
+ if filepath and os.path.exists(filepath):
68
+ try:
69
+ os.remove(filepath)
70
+ deleted_files += 1
71
+ except Exception as e:
72
+ logger.warning(f"Failed to delete old file {filepath}: {e}")
73
+
74
+ # Use a separate execution for deletion to get rowcount correctly if needed,
75
+ # or just run it. aiosqlite doesn't have cursor.rowcount directly on execute sometimes?
76
+ # Actually it does.
77
+ async with db.execute('''DELETE FROM tasks WHERE created_at < ?''', (cutoff_date,)) as cursor:
78
+ deleted_rows = cursor.rowcount
79
+ await db.commit()
80
+
81
+ if deleted_rows > 0 or deleted_files > 0:
82
+ logger.info(f"Cleanup: Deleted {deleted_rows} old entries and {deleted_files} files (older than 10 days)")
83
+ except Exception as e:
84
+ logger.error(f"Cleanup error: {e}")
85
+
86
+ async def get_average_processing_time():
87
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
88
+ db.row_factory = aiosqlite.Row
89
+ async with db.execute('''SELECT created_at, processed_at FROM tasks
90
+ WHERE status = 'completed' AND processed_at IS NOT NULL
91
+ ORDER BY processed_at DESC LIMIT 20''') as cursor:
92
+ completed_rows = await cursor.fetchall()
93
+
94
+ if not completed_rows:
95
+ return 30.0
96
+
97
+ total_seconds = 0
98
+ count = 0
99
+ for r in completed_rows:
100
+ try:
101
+ created = datetime.fromisoformat(r['created_at'])
102
+ processed = datetime.fromisoformat(r['processed_at'])
103
+ duration = (processed - created).total_seconds()
104
+ if duration > 0:
105
+ total_seconds += duration
106
+ count += 1
107
+ except:
108
+ continue
109
+
110
+ return total_seconds / count if count > 0 else 30.0
111
+
112
+ async def get_all_tasks():
113
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
114
+ db.row_factory = aiosqlite.Row
115
+
116
+ avg_time = await get_average_processing_time()
117
+
118
+ async with db.execute('''SELECT id FROM tasks
119
+ WHERE status = 'not_started'
120
+ ORDER BY created_at ASC''') as cursor:
121
+ queue_ids = [row['id'] for row in await cursor.fetchall()]
122
+
123
+ async with db.execute('''SELECT COUNT(*) as count FROM tasks WHERE status = 'processing' ''') as cursor:
124
+ row = await cursor.fetchone()
125
+ processing_count = row['count']
126
+
127
+ async with db.execute('SELECT * FROM tasks WHERE hide_from_ui = 0 OR hide_from_ui IS NULL ORDER BY created_at DESC') as cursor:
128
+ rows = await cursor.fetchall()
129
+
130
+ return rows, queue_ids, processing_count, avg_time
131
+
132
+ async def get_task_by_id(task_id: str):
133
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
134
+ db.row_factory = aiosqlite.Row
135
+ async with db.execute('SELECT * FROM tasks WHERE id = ?', (task_id,)) as cursor:
136
+ row = await cursor.fetchone()
137
+
138
+ if not row:
139
+ return None
140
+
141
+ queue_position = None
142
+ estimated_start_seconds = None
143
+
144
+ if row['status'] == 'not_started':
145
+ avg_time = await get_average_processing_time()
146
+
147
+ async with db.execute('''SELECT COUNT(*) as position FROM tasks
148
+ WHERE status = 'not_started' AND created_at < ?''',
149
+ (row['created_at'],)) as cursor:
150
+ position_row = await cursor.fetchone()
151
+ queue_position = position_row['position'] + 1
152
+
153
+ async with db.execute('''SELECT COUNT(*) as count FROM tasks WHERE status = 'processing' ''') as cursor:
154
+ count_row = await cursor.fetchone()
155
+ processing_count = count_row['count']
156
+
157
+ tasks_ahead = queue_position - 1 + processing_count
158
+ estimated_start_seconds = round(tasks_ahead * avg_time)
159
+
160
+ return row, queue_position, estimated_start_seconds
app/db/database.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiosqlite
2
+ from app.core.config import settings
3
+ from custom_logger import logger_config as logger
4
+
5
+ async def init_db():
6
+ logger.info(f"Initializing database at {settings.DATABASE_FILE}")
7
+ async with aiosqlite.connect(settings.DATABASE_FILE) as db:
8
+ await db.execute('''CREATE TABLE IF NOT EXISTS tasks
9
+ (id TEXT PRIMARY KEY,
10
+ filename TEXT NOT NULL,
11
+ filepath TEXT NOT NULL,
12
+ status TEXT NOT NULL,
13
+ result TEXT,
14
+ created_at TEXT NOT NULL,
15
+ processed_at TEXT,
16
+ progress INTEGER DEFAULT 0,
17
+ progress_text TEXT,
18
+ hide_from_ui INTEGER DEFAULT 0)'''
19
+ )
20
+ await db.commit()
21
+ logger.info("Database initialized successfully.")
app/main.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from app.api.routes import router
5
+ from app.db.database import init_db
6
+ from custom_logger import logger_config as logger
7
+
8
+ @asynccontextmanager
9
+ async def lifespan(app: FastAPI):
10
+ logger.info("="*60)
11
+ logger.info("STT Backend API Server Starting Up")
12
+ logger.info("="*60)
13
+ logger.info("Worker will start automatically on first upload")
14
+ logger.info("Audio files will be deleted after successful processing")
15
+ logger.info("="*60)
16
+
17
+ await init_db()
18
+ yield
19
+ logger.info("STT Backend API Server Shutting Down")
20
+
21
+ app = FastAPI(title="STT Backend API", version="2.0.0", lifespan=lifespan)
22
+
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ app.include_router(router)
app/services/__init__.py ADDED
File without changes
app/services/worker.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import json
4
+ import shlex
5
+ import re
6
+ from app.core.config import settings
7
+ from custom_logger import logger_config as logger
8
+ from app.db import crud
9
+
10
+ worker_task = None
11
+ worker_running = False
12
+
13
+ def is_worker_running():
14
+ return worker_running
15
+
16
+ async def start_worker():
17
+ global worker_task, worker_running
18
+
19
+ logger.info(f"start_worker called: worker_running={worker_running}")
20
+
21
+ if not worker_running:
22
+ worker_running = True
23
+ worker_task = asyncio.create_task(worker_loop())
24
+ logger.info("Worker task started")
25
+ else:
26
+ logger.info("Worker already running")
27
+
28
+ async def worker_loop():
29
+ global worker_running
30
+ logger.info("STT Worker started. Monitoring for new audio files...")
31
+
32
+ while worker_running:
33
+ logger.debug("Worker loop iteration, checking for files...")
34
+ await crud.cleanup_old_entries()
35
+
36
+ try:
37
+ row = await crud.get_next_not_started()
38
+
39
+ if row:
40
+ task_id = row['id']
41
+ filepath = row['filepath']
42
+ filename = row['filename']
43
+
44
+ logger.info(f"\n{'='*60}\nProcessing: {filename}\nID: {task_id}\n{'='*60}")
45
+
46
+ await crud.update_status(task_id, 'processing')
47
+
48
+ try:
49
+ await crud.update_progress(task_id, 5, "Starting STT...")
50
+
51
+ command = f"cd {settings.CWD} && {settings.PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {settings.STT_MODEL_NAME}"
52
+
53
+ logger.debug(f"Executing command: {command}")
54
+
55
+ process = await asyncio.create_subprocess_shell(
56
+ command,
57
+ stdout=asyncio.subprocess.PIPE,
58
+ stderr=asyncio.subprocess.STDOUT,
59
+ cwd=settings.CWD,
60
+ env={
61
+ **os.environ,
62
+ 'PYTHONUNBUFFERED': '1',
63
+ 'CUDA_LAUNCH_BLOCKING': '1',
64
+ 'USE_CPU_IF_POSSIBLE': 'true'
65
+ }
66
+ )
67
+
68
+ current_chunk = 1
69
+ total_chunks = 1
70
+
71
+ while True:
72
+ line = await process.stdout.readline()
73
+ if not line:
74
+ break
75
+
76
+ line_str = line.decode('utf-8', errors='replace').strip()
77
+ if line_str:
78
+ logger.info(f"[STT] {line_str}")
79
+
80
+ # Track chunk progress
81
+ chunk_match = re.search(r'Processing chunk (\d+)/(\d+)', line_str)
82
+ if chunk_match:
83
+ try:
84
+ current_chunk = int(chunk_match.group(1))
85
+ total_chunks = int(chunk_match.group(2))
86
+ except: pass
87
+
88
+ # Generic percentage matcher
89
+ percent_match = re.search(r'(\d+)%', line_str)
90
+ if percent_match:
91
+ try:
92
+ percent = int(percent_match.group(1))
93
+ if 'audio' in line_str.lower() or 'extract' in line_str.lower():
94
+ await crud.update_progress(task_id, percent // 2, "Extracting audio...")
95
+ elif 'transcrib' in line_str.lower() or 'model' in line_str.lower():
96
+ # Calculate overall transcription progress based on chunks
97
+ chunk_base = ((current_chunk - 1) / total_chunks) * 100
98
+ chunk_progress = (percent / total_chunks)
99
+ overall_transcription_progress = chunk_base + chunk_progress
100
+
101
+ # Remap so 50-100% of the overall bar is transcription
102
+ overall_progress = int(50 + (overall_transcription_progress / 2))
103
+ await crud.update_progress(task_id, overall_progress, f"Transcribing... (Chunk {current_chunk}/{total_chunks})")
104
+ else:
105
+ await crud.update_progress(task_id, percent, "Processing...")
106
+ except: pass
107
+
108
+ # Stage matchers
109
+ if 'initializing nemo asr' in line_str.lower():
110
+ await crud.update_progress(task_id, 10, "Initializing engine...")
111
+ elif 'extracting audio' in line_str.lower():
112
+ await crud.update_progress(task_id, 15, "Extracting audio...")
113
+ elif 'model loaded' in line_str.lower():
114
+ await crud.update_progress(task_id, 25, "Model loaded...")
115
+ elif 'processing audio duration' in line_str.lower():
116
+ await crud.update_progress(task_id, 35, "Analyzing audio...")
117
+ elif 'transcription started' in line_str.lower() and total_chunks == 1:
118
+ await crud.update_progress(task_id, 50, "Transcribing started...")
119
+ elif 'transcription completed successfully' in line_str.lower():
120
+ await crud.update_progress(task_id, 90, "Transcription finished.")
121
+ elif 'json transcription saved' in line_str.lower():
122
+ await crud.update_progress(task_id, 95, "Saving data...")
123
+
124
+ await process.wait()
125
+ if process.returncode != 0:
126
+ raise Exception(f"STT process failed with return code {process.returncode}")
127
+
128
+ await crud.update_progress(task_id, 98, "Reading results...")
129
+
130
+ output_path = os.path.join(settings.CWD, settings.TEMP_DIR, 'output_transcription.json')
131
+ with open(output_path, 'r') as file:
132
+ result = json.loads(file.read().strip())
133
+
134
+ # Extract result text (caption)
135
+ result_data = result.get('text', '') or result.get('transcription', '') or str(result)
136
+
137
+ logger.success(f"Successfully processed: {filename}")
138
+ logger.info(f"Text preview: {result_data[:100]}...")
139
+
140
+ await crud.update_status(task_id, 'completed', result=json.dumps(result))
141
+
142
+ if os.path.exists(filepath):
143
+ os.remove(filepath)
144
+ logger.debug(f"Deleted audio file: {filepath}")
145
+
146
+ except Exception as e:
147
+ logger.error(f"Failed to process {filename}: {str(e)}")
148
+ await crud.update_status(task_id, 'failed', error=str(e))
149
+
150
+ else:
151
+ await asyncio.sleep(settings.POLL_INTERVAL)
152
+
153
+ except Exception as e:
154
+ logger.error(f"Worker error: {str(e)}")
155
+ await asyncio.sleep(settings.POLL_INTERVAL)
audio_captions.db ADDED
Binary file (69.6 kB). View file
 
index.html CHANGED
@@ -2,927 +2,816 @@
2
  <html lang="en">
3
 
4
  <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Audio Caption Generator</title>
8
- <style>
9
- * {
10
- margin: 0;
11
- padding: 0;
12
- box-sizing: border-box;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
 
14
 
15
- :root {
16
- --bg: #0a0e27;
17
- --surface: #141b3d;
18
- --primary: #00ff88;
19
- --secondary: #ff00ff;
20
- --accent: #00d4ff;
21
- --error: #ff1744;
22
- --text: #ffffff;
23
- --border: 4px;
24
- }
 
 
25
 
 
 
26
  body {
27
- font-family: 'Space Grotesk', 'Courier New', monospace;
28
- background: var(--bg);
29
- color: var(--text);
30
- min-height: 100vh;
31
- overflow-x: hidden;
32
- position: relative;
33
- }
34
-
35
- body::before {
36
- content: '';
37
- position: fixed;
38
- top: 0;
39
- left: 0;
40
- width: 100%;
41
- height: 100%;
42
- background:
43
- radial-gradient(circle at 20% 50%, rgba(0, 255, 136, 0.1) 0%, transparent 50%),
44
- radial-gradient(circle at 80% 80%, rgba(255, 0, 255, 0.1) 0%, transparent 50%),
45
- radial-gradient(circle at 40% 20%, rgba(0, 212, 255, 0.1) 0%, transparent 50%);
46
- pointer-events: none;
47
- z-index: 0;
48
- }
49
-
50
- .container {
51
- max-width: 1400px;
52
- margin: 0 auto;
53
- padding: 2rem;
54
- position: relative;
55
- z-index: 1;
56
- }
57
-
58
- header {
59
- text-align: center;
60
- margin-bottom: 3rem;
61
- animation: slideDown 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55);
62
- }
63
-
64
- @keyframes slideDown {
65
- from {
66
- opacity: 0;
67
- transform: translateY(-50px);
68
- }
69
-
70
- to {
71
- opacity: 1;
72
- transform: translateY(0);
73
- }
74
- }
75
-
76
- h1 {
77
- font-size: clamp(2rem, 5vw, 4rem);
78
- font-weight: 900;
79
- background: linear-gradient(135deg, var(--primary) 0%, var(--accent) 50%, var(--secondary) 100%);
80
- -webkit-background-clip: text;
81
- -webkit-text-fill-color: transparent;
82
- background-clip: text;
83
- text-transform: uppercase;
84
- letter-spacing: -2px;
85
- margin-bottom: 1rem;
86
- position: relative;
87
- display: inline-block;
88
- }
89
-
90
- h1::after {
91
- content: '';
92
- position: absolute;
93
- bottom: -10px;
94
- left: 50%;
95
- transform: translateX(-50%);
96
- width: 60%;
97
- height: 6px;
98
- background: linear-gradient(90deg, transparent, var(--primary), transparent);
99
- animation: glow 2s ease-in-out infinite;
100
- }
101
-
102
- @keyframes glow {
103
-
104
- 0%,
105
- 100% {
106
- opacity: 0.5;
107
- }
108
-
109
- 50% {
110
- opacity: 1;
111
- }
112
  }
113
 
114
- .subtitle {
115
- font-size: 1.2rem;
116
- color: var(--accent);
117
- letter-spacing: 2px;
118
  }
119
 
120
- .upload-section {
121
- background: var(--surface);
122
- border: var(--border) solid var(--primary);
123
- box-shadow: 8px 8px 0 var(--primary);
124
- padding: 2rem;
125
- margin-bottom: 3rem;
126
- position: relative;
127
- transition: all 0.3s ease;
128
- animation: slideUp 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55) 0.2s both;
129
  }
130
 
131
- @keyframes slideUp {
132
- from {
133
- opacity: 0;
134
- transform: translateY(50px);
135
- }
136
-
137
- to {
138
- opacity: 1;
139
- transform: translateY(0);
140
- }
141
- }
142
-
143
- .upload-section:hover {
144
- transform: translate(-2px, -2px);
145
- box-shadow: 12px 12px 0 var(--primary);
146
  }
147
 
148
- .upload-zone {
149
- border: 3px dashed var(--accent);
150
- padding: 3rem;
151
- text-align: center;
152
- cursor: pointer;
153
- transition: all 0.3s ease;
154
- background: rgba(0, 212, 255, 0.05);
155
  }
156
 
157
- .upload-zone:hover {
158
- background: rgba(0, 212, 255, 0.1);
159
- border-color: var(--primary);
 
160
  }
161
 
162
- .upload-zone.dragging {
163
- background: rgba(0, 255, 136, 0.2);
164
- border-color: var(--primary);
165
- transform: scale(1.02);
166
  }
167
 
168
- input[type="file"] {
169
- display: none;
 
 
170
  }
171
 
172
- .btn {
173
- background: var(--primary);
174
- color: var(--bg);
175
- border: var(--border) solid var(--bg);
176
- padding: 1rem 2rem;
177
- font-size: 1.1rem;
178
- font-weight: 900;
179
- text-transform: uppercase;
180
- cursor: pointer;
181
  transition: all 0.2s ease;
182
- box-shadow: 4px 4px 0 var(--bg);
183
- letter-spacing: 1px;
184
- position: relative;
185
- }
186
-
187
- .btn:hover:not(:disabled) {
188
- transform: translate(-2px, -2px);
189
- box-shadow: 6px 6px 0 var(--bg);
190
- }
191
-
192
- .btn:active:not(:disabled) {
193
- transform: translate(2px, 2px);
194
- box-shadow: 2px 2px 0 var(--bg);
195
- }
196
-
197
- .btn:disabled {
198
- opacity: 0.6;
199
- cursor: not-allowed;
200
- }
201
-
202
- .btn-secondary {
203
- background: var(--accent);
204
- }
205
-
206
- .btn-small {
207
- padding: 0.5rem 1rem;
208
- font-size: 0.85rem;
209
- box-shadow: 3px 3px 0 var(--bg);
210
- }
211
-
212
- .btn-small:hover:not(:disabled) {
213
- box-shadow: 4px 4px 0 var(--bg);
214
- }
215
-
216
- .table-section {
217
- animation: slideUp 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55) 0.4s both;
218
- }
219
-
220
- .table-wrapper {
221
- overflow-x: auto;
222
- background: var(--surface);
223
- border: var(--border) solid var(--secondary);
224
- box-shadow: 8px 8px 0 var(--secondary);
225
- }
226
-
227
- table {
228
- width: 100%;
229
- border-collapse: collapse;
230
- }
231
-
232
- thead {
233
- background: linear-gradient(135deg, var(--primary), var(--accent));
234
- }
235
-
236
- th {
237
- padding: 1.5rem 1rem;
238
- text-align: left;
239
- font-weight: 900;
240
- text-transform: uppercase;
241
- letter-spacing: 1px;
242
- color: var(--bg);
243
- border-right: 3px solid var(--bg);
244
  }
245
 
246
- th:last-child {
247
- border-right: none;
 
248
  }
249
 
250
- tbody tr {
251
- border-bottom: 2px solid rgba(0, 212, 255, 0.2);
252
- transition: all 0.3s ease;
253
- animation: fadeIn 0.5s ease;
254
  }
255
 
256
- @keyframes fadeIn {
257
  from {
258
- opacity: 0;
259
  }
260
 
261
  to {
262
- opacity: 1;
263
  }
264
  }
265
 
266
- tbody tr:hover {
267
- background: rgba(0, 255, 136, 0.1);
268
- }
269
-
270
- td {
271
- padding: 1.5rem 1rem;
272
- color: var(--text);
273
- }
274
-
275
- .status {
276
- display: inline-block;
277
- padding: 0.5rem 1rem;
278
- border: 3px solid;
279
- font-weight: 900;
280
- text-transform: uppercase;
281
- font-size: 0.85rem;
282
- letter-spacing: 1px;
283
- }
284
-
285
- .status-not_started {
286
- background: var(--bg);
287
- border-color: var(--accent);
288
- color: var(--accent);
289
- }
290
-
291
- .status-processing {
292
- background: var(--bg);
293
- border-color: var(--primary);
294
- color: var(--primary);
295
- animation: pulse 1.5s ease-in-out infinite;
296
- }
297
-
298
- @keyframes pulse {
299
-
300
- 0%,
301
- 100% {
302
- opacity: 1;
303
  }
304
 
305
  50% {
306
- opacity: 0.6;
307
  }
308
- }
309
-
310
- .status-completed {
311
- background: var(--primary);
312
- border-color: var(--primary);
313
- color: var(--bg);
314
- }
315
-
316
- .status-failed {
317
- background: var(--error);
318
- border-color: var(--error);
319
- color: var(--text);
320
- }
321
 
322
- .caption-cell {
323
- max-width: 200px;
324
- overflow: hidden;
325
- text-overflow: ellipsis;
326
- white-space: nowrap;
327
- }
328
-
329
- .empty-state {
330
- text-align: center;
331
- padding: 4rem 2rem;
332
- color: var(--accent);
333
- font-size: 1.2rem;
334
  }
335
 
336
- .refresh-btn {
337
- position: fixed;
338
- bottom: 2rem;
339
- right: 2rem;
340
- width: 60px;
341
- height: 60px;
342
- border-radius: 50%;
343
- background: var(--secondary);
344
- border: var(--border) solid var(--bg);
345
- box-shadow: 4px 4px 0 var(--bg);
346
- cursor: pointer;
347
- transition: all 0.3s ease;
348
- display: flex;
349
- align-items: center;
350
- justify-content: center;
351
- font-size: 1.5rem;
352
- z-index: 1000;
353
  }
354
 
355
- .refresh-btn:hover {
356
- transform: rotate(180deg) scale(1.1);
357
- box-shadow: 6px 6px 0 var(--bg);
358
  }
359
 
360
- /* Loader styles */
361
- .loader-overlay {
362
- position: fixed;
363
- top: 0;
364
- left: 0;
365
- width: 100%;
366
- height: 100%;
367
- background: rgba(10, 14, 39, 0.95);
368
- display: flex;
369
- align-items: center;
370
- justify-content: center;
371
- z-index: 9999;
372
- animation: fadeIn 0.3s ease;
373
  }
374
 
375
- .loader {
376
- width: 80px;
377
- height: 80px;
378
- border: 6px solid var(--surface);
379
- border-top: 6px solid var(--primary);
380
- border-right: 6px solid var(--accent);
381
- border-bottom: 6px solid var(--secondary);
382
- border-radius: 50%;
383
- animation: spin 1s linear infinite;
384
  }
385
 
386
- @keyframes spin {
387
- 0% {
388
- transform: rotate(0deg);
389
- }
390
-
391
- 100% {
392
- transform: rotate(360deg);
393
- }
394
  }
395
 
396
- .loader-text {
397
- position: absolute;
398
- margin-top: 120px;
399
- font-size: 1.2rem;
400
- font-weight: 900;
401
- color: var(--primary);
402
- text-transform: uppercase;
403
- letter-spacing: 2px;
404
  }
405
 
406
- /* Modal styles */
407
  .modal {
408
- display: none;
409
  position: fixed;
410
- top: 0;
411
- left: 0;
412
- width: 100%;
413
- height: 100%;
414
- background: rgba(10, 14, 39, 0.95);
415
- z-index: 2000;
416
- animation: fadeIn 0.3s ease;
417
- overflow-y: auto;
418
  }
419
 
420
  .modal.active {
421
  display: flex;
422
- align-items: center;
423
- justify-content: center;
424
- padding: 2rem;
425
  }
426
 
427
  .modal-content {
428
- background: var(--surface);
429
- border: var(--border) solid var(--primary);
430
- box-shadow: 12px 12px 0 var(--primary);
431
- max-width: 800px;
432
  width: 100%;
433
- max-height: 80vh;
434
- position: relative;
435
- animation: modalSlideIn 0.4s cubic-bezier(0.68, -0.55, 0.265, 1.55);
436
  display: flex;
437
  flex-direction: column;
 
 
 
438
  }
439
 
440
- @keyframes modalSlideIn {
441
- from {
442
- opacity: 0;
443
- transform: translateY(-50px) scale(0.9);
444
- }
445
-
446
- to {
447
- opacity: 1;
448
- transform: translateY(0) scale(1);
449
- }
450
  }
451
 
452
  .modal-header {
 
 
453
  display: flex;
454
  justify-content: space-between;
455
  align-items: center;
456
- padding: 2rem 2rem 1rem 2rem;
457
- border-bottom: 3px solid var(--primary);
458
- background: var(--surface);
459
- position: sticky;
460
- top: 0;
461
  z-index: 10;
462
  }
463
 
464
- .modal-title {
465
- font-size: 1.5rem;
466
- font-weight: 900;
467
- color: var(--primary);
468
- text-transform: uppercase;
469
  }
470
 
471
- .modal-close {
472
- background: var(--error);
473
- color: var(--text);
474
- border: 3px solid var(--bg);
475
- width: 40px;
476
- height: 40px;
477
- border-radius: 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  cursor: pointer;
479
- font-size: 1.5rem;
480
- font-weight: 900;
481
- transition: all 0.2s ease;
482
- box-shadow: 3px 3px 0 var(--bg);
483
  }
484
 
485
- .modal-close:hover {
486
- transform: translate(-2px, -2px);
487
- box-shadow: 5px 5px 0 var(--bg);
488
- }
489
-
490
- .code-block {
491
- background: var(--bg);
492
- border: 3px solid var(--accent);
493
- padding: 1.5rem 1.5rem 1.5rem 1.5rem;
494
- border-radius: 0;
495
- overflow-y: auto;
496
- margin: 1.5rem 2rem 2rem 2rem;
497
- position: relative;
498
- flex: 1;
499
- }
500
-
501
- .code-block code {
502
- font-family: 'Courier New', monospace;
503
- color: var(--primary);
504
- font-size: 0.95rem;
505
- line-height: 1.6;
506
- white-space: pre-wrap;
507
- word-break: break-all;
508
  }
509
 
510
  .copy-btn {
511
- position: sticky;
512
- top: 0.5rem;
513
- float: right;
514
- background: var(--accent);
515
- color: var(--bg);
516
- border: 3px solid var(--bg);
517
- padding: 0.5rem 1rem;
518
- font-size: 0.8rem;
519
- font-weight: 900;
520
- cursor: pointer;
521
- transition: all 0.2s ease;
522
- box-shadow: 3px 3px 0 var(--bg);
523
- z-index: 5;
524
  }
525
 
526
  .copy-btn:hover {
527
  transform: translate(-2px, -2px);
528
- box-shadow: 4px 4px 0 var(--bg);
529
  }
530
 
531
- .copy-btn.copied {
532
- background: var(--primary);
 
 
 
533
  }
534
 
535
- @media (max-width: 768px) {
536
- .container {
537
- padding: 1rem;
538
- }
539
-
540
- .upload-section,
541
- .table-wrapper {
542
- box-shadow: 4px 4px 0 var(--primary);
543
- }
544
-
545
- th,
546
- td {
547
- padding: 1rem 0.5rem;
548
- font-size: 0.9rem;
549
- }
550
-
551
- .caption-cell {
552
- max-width: 100px;
553
- }
554
 
555
- .modal-content {
556
- padding: 0;
557
- }
 
 
 
 
 
 
 
 
 
 
558
 
559
- .modal-header {
560
- padding: 1.5rem 1.5rem 1rem 1.5rem;
561
- }
 
 
 
 
 
562
 
563
- .code-block {
564
- margin: 1.5rem 1.5rem 1.5rem 1.5rem;
565
- }
 
 
 
566
  }
567
 
568
- .notification {
569
- position: fixed;
570
- top: 2rem;
571
- right: 2rem;
572
- padding: 1.5rem 2rem;
573
- background: var(--primary);
574
- color: var(--bg);
575
- border: var(--border) solid var(--bg);
576
- box-shadow: 6px 6px 0 var(--bg);
577
- font-weight: 900;
578
- z-index: 2000;
579
- animation: slideInRight 0.5s ease, slideOutRight 0.5s ease 3.5s;
580
  }
581
 
582
- @keyframes slideInRight {
583
- from {
584
- transform: translateX(400px);
585
- opacity: 0;
586
- }
587
 
588
- to {
589
- transform: translateX(0);
590
- opacity: 1;
591
- }
592
  }
593
 
594
- @keyframes slideOutRight {
595
- to {
596
- transform: translateX(400px);
597
- opacity: 0;
598
- }
599
  }
600
  </style>
601
  </head>
602
 
603
- <body>
604
- <div class="container">
605
- <header>
606
- <h1>Audio Caption Generator</h1>
607
- <p class="subtitle">Transcribe • Process • Analyze</p>
608
- </header>
609
-
610
- <div class="upload-section">
611
- <h2 style="margin-bottom: 1.5rem; color: var(--primary);">Upload Audio File</h2>
612
- <div class="upload-zone" id="uploadZone">
613
- <p style="font-size: 1.2rem; margin-bottom: 1rem;">📁 Drop audio file here or click to browse</p>
614
- <p style="color: var(--accent); font-size: 0.9rem;">Supported: WAV, MP3, FLAC, OGG, M4A, AAC</p>
615
- <input type="file" id="audioFile" accept=".wav,.mp3,.flac,.ogg,.m4a,.aac">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  </div>
617
- <button class="btn" id="uploadBtn" style="margin-top: 1.5rem; width: 100%;">
618
- 🚀 Upload & Process
619
- </button>
620
  </div>
621
 
622
- <div class="table-section">
623
- <h2 style="margin-bottom: 1.5rem; color: var(--secondary);">Processing Queue</h2>
624
- <div class="table-wrapper">
625
- <table>
626
- <thead>
627
- <tr>
628
- <th>Filename</th>
629
- <th>Status</th>
630
- <th>Est. Wait</th>
631
- <th>Caption</th>
632
- <th>Created</th>
633
- <th>Processed</th>
634
- </tr>
635
- </thead>
636
- <tbody id="filesTable">
637
- <tr>
638
- <td colspan="6" class="empty-state">No files uploaded yet. Start by uploading an audio file!
639
- </td>
640
- </tr>
641
- </tbody>
642
- </table>
643
  </div>
644
  </div>
645
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
 
647
- <button class="refresh-btn" id="refreshBtn" title="Refresh">🔄</button>
 
 
 
 
 
 
 
648
 
649
- <!-- Loader -->
650
- <div class="loader-overlay" id="loader" style="display: none;">
651
- <div>
652
- <div class="loader"></div>
653
- <div class="loader-text">Uploading...</div>
654
- </div>
655
- </div>
656
 
657
- <!-- Modal -->
658
- <div class="modal" id="captionModal">
 
659
  <div class="modal-content">
 
 
 
 
 
 
 
 
 
 
 
 
660
  <div class="modal-header">
661
- <div class="modal-title">📄 Caption Details</div>
662
- <button class="modal-close" onclick="closeModal()">×</button>
 
 
 
663
  </div>
664
- <div class="code-block">
665
- <button class="copy-btn" onclick="copyCaption()">📋 Copy</button>
666
- <code id="captionCode"></code>
667
  </div>
668
  </div>
669
  </div>
670
 
671
- <script>
672
- const API_URL = '/api';
673
- let selectedFile = null;
674
-
675
- // Store captions by file ID to avoid inline JSON issues with large texts
676
- const captionStore = new Map();
677
-
678
- // Upload zone interactions
679
- const uploadZone = document.getElementById('uploadZone');
680
- const fileInput = document.getElementById('audioFile');
681
- const loader = document.getElementById('loader');
682
- const uploadBtn = document.getElementById('uploadBtn');
 
 
 
 
 
 
 
 
 
 
683
 
684
- uploadZone.addEventListener('click', () => fileInput.click());
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
 
686
- uploadZone.addEventListener('dragover', (e) => {
687
- e.preventDefault();
688
- uploadZone.classList.add('dragging');
689
- });
690
 
691
- uploadZone.addEventListener('dragleave', () => {
692
- uploadZone.classList.remove('dragging');
693
- });
 
 
 
 
 
 
694
 
695
- uploadZone.addEventListener('drop', (e) => {
696
- e.preventDefault();
697
- uploadZone.classList.remove('dragging');
698
- const file = e.dataTransfer.files[0];
699
- if (file) {
700
- fileInput.files = e.dataTransfer.files;
701
- selectedFile = file;
702
- showNotification(`Selected: ${file.name}`);
703
- }
704
- });
705
-
706
- fileInput.addEventListener('change', (e) => {
707
- selectedFile = e.target.files[0];
708
- if (selectedFile) {
709
- showNotification(`Selected: ${selectedFile.name}`);
710
- // Auto-upload if triggered by upload button click
711
- if (pendingUpload) {
712
- pendingUpload = false;
713
- uploadFile();
714
- }
715
  }
716
- });
717
 
718
- // Flag to track if upload was triggered without file selection
719
- let pendingUpload = false;
720
 
721
- // Upload file function
722
- async function uploadFile() {
723
- if (!selectedFile) {
724
- return;
725
- }
726
 
727
  const formData = new FormData();
728
- formData.append('audio', selectedFile);
729
-
730
- // Show loader
731
- loader.style.display = 'flex';
732
- uploadBtn.disabled = true;
733
 
734
  try {
735
- const response = await fetch(`${API_URL}/upload`, {
736
- method: 'POST',
737
- body: formData
738
- });
739
-
740
- const data = await response.json();
741
-
742
- if (response.ok) {
743
- showNotification('File uploaded successfully! 🎉');
744
- selectedFile = null;
745
- fileInput.value = '';
746
- loadFiles();
747
  } else {
748
- showNotification(data.error || 'Upload failed', 'error');
 
749
  }
750
- } catch (error) {
751
- showNotification('Network error: ' + error.message, 'error');
752
- } finally {
753
- // Hide loader
754
- loader.style.display = 'none';
755
- uploadBtn.disabled = false;
756
  }
757
  }
758
 
759
- // Upload button - one-click flow
760
- uploadBtn.addEventListener('click', async () => {
761
- if (!selectedFile) {
762
- // No file selected - trigger file picker and auto-upload after selection
763
- pendingUpload = true;
764
- fileInput.click();
765
- return;
766
- }
767
-
768
- // File already selected - upload directly
769
- uploadFile();
770
- });
771
-
772
- // Load files
773
- async function loadFiles() {
774
  try {
775
- const response = await fetch(`${API_URL}/files`);
776
- const files = await response.json();
777
-
778
- const tbody = document.getElementById('filesTable');
779
 
780
- // Clear old captions and store new ones
781
- captionStore.clear();
782
 
783
- if (files.length === 0) {
784
- tbody.innerHTML = '<tr><td colspan="6" class="empty-state">No files uploaded yet. Start by uploading an audio file!</td></tr>';
785
- return;
786
- }
787
-
788
- tbody.innerHTML = files.map(file => {
789
- // Store caption in Map to avoid inline JSON issues with large texts
790
- if (file.caption) {
791
- captionStore.set(file.id, file.caption);
792
- }
793
 
794
- const captionPreview = file.caption ?
795
- (file.caption.length > 50 ? file.caption.substring(0, 50) + '...' : file.caption) :
796
- '—';
797
-
798
- // Format estimated wait time
799
- let estWait = '—';
800
- if (file.status === 'not_started' && file.estimated_start_seconds !== null) {
801
- const seconds = file.estimated_start_seconds;
802
- if (seconds < 60) {
803
- estWait = `${seconds}s`;
804
- } else if (seconds < 3600) {
805
- const mins = Math.floor(seconds / 60);
806
- const secs = seconds % 60;
807
- estWait = secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
808
- } else {
809
- const hours = Math.floor(seconds / 3600);
810
- const mins = Math.floor((seconds % 3600) / 60);
811
- estWait = mins > 0 ? `${hours}h ${mins}m` : `${hours}h`;
812
- }
813
- // Add queue position
814
- if (file.queue_position) {
815
- estWait = `#${file.queue_position} (${estWait})`;
816
- }
817
- } else if (file.status === 'processing') {
818
- estWait = '⏳ Processing...';
819
- }
820
-
821
- return `
822
- <tr>
823
- <td><strong>${file.filename}</strong></td>
824
- <td><span class="status status-${file.status}">${file.status.replace('_', ' ')}</span></td>
825
- <td>${estWait}</td>
826
- <td class="caption-cell">
827
- ${file.caption ?
828
- `<button class="btn btn-small btn-secondary" onclick="showCaption('${file.id}')" style="margin-left: 0.5rem;">Show</button>`
829
- : '—'}
830
- </td>
831
- <td>${new Date(file.created_at).toLocaleString()}</td>
832
- <td>${file.processed_at ? new Date(file.processed_at).toLocaleString() : '—'}</td>
833
- </tr>
834
- `;
835
- }).join('');
836
- } catch (error) {
837
- console.error('Error loading files:', error);
838
  }
839
  }
840
 
841
- // Modal functions
842
- function showCaption(fileId) {
843
- const caption = captionStore.get(fileId);
844
- if (!caption) {
845
- console.error('Caption not found for file:', fileId);
846
- return;
847
- }
848
-
849
- const modal = document.getElementById('captionModal');
850
- const codeBlock = document.getElementById('captionCode');
851
-
852
- // Parse JSON if it's a string
853
- let formattedCaption = caption;
854
  try {
855
- const parsed = JSON.parse(caption);
856
- formattedCaption = JSON.stringify(parsed, null, 2);
 
 
 
 
 
857
  } catch (e) {
858
- // If not JSON, use as is
859
- formattedCaption = caption;
 
860
  }
861
-
862
- codeBlock.textContent = formattedCaption;
863
- modal.classList.add('active');
864
- }
865
-
866
- function closeModal() {
867
- const modal = document.getElementById('captionModal');
868
- modal.classList.remove('active');
869
  }
870
 
871
- function copyCaption() {
872
- const codeBlock = document.getElementById('captionCode');
873
- const copyBtn = event.target;
874
-
875
- navigator.clipboard.writeText(codeBlock.textContent).then(() => {
876
- const originalText = copyBtn.textContent;
877
- copyBtn.textContent = '✓ Copied!';
878
- copyBtn.classList.add('copied');
879
-
880
- setTimeout(() => {
881
- copyBtn.textContent = originalText;
882
- copyBtn.classList.remove('copied');
883
- }, 2000);
884
- });
885
- }
886
-
887
- // Close modal on background click
888
- document.getElementById('captionModal').addEventListener('click', (e) => {
889
- if (e.target.id === 'captionModal') {
890
- closeModal();
891
- }
892
- });
893
-
894
- // Close modal on ESC key
895
- document.addEventListener('keydown', (e) => {
896
- if (e.key === 'Escape') {
897
- closeModal();
898
- }
899
- });
900
-
901
- // Refresh button
902
- document.getElementById('refreshBtn').addEventListener('click', loadFiles);
903
-
904
- // Auto-refresh every 10 minutes
905
- setInterval(loadFiles, 1000 * 60 * 10);
906
-
907
- // Show notification
908
- function showNotification(message, type = 'success') {
909
- const notification = document.createElement('div');
910
- notification.className = 'notification';
911
- notification.textContent = message;
912
-
913
- if (type === 'error') {
914
- notification.style.background = 'var(--error)';
915
  }
916
 
917
- document.body.appendChild(notification);
918
-
919
- setTimeout(() => {
920
- notification.remove();
921
- }, 4000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  }
923
 
924
- // Initial load
925
- loadFiles();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
926
  </script>
927
  </body>
928
 
 
2
  <html lang="en">
3
 
4
  <head>
5
+ <meta charset="utf-8" />
6
+ <meta content="width=device-width, initial-scale=1.0" name="viewport" />
7
+ <title>STT - Speech to Text</title>
8
+
9
+ <!-- External Assets -->
10
+ <script src="https://cdn.tailwindcss.com?plugins=forms,container-queries"></script>
11
+ <link href="https://fonts.googleapis.com/css2?family=Fredoka:wght@300..700&family=Caveat:wght@400..700&display=swap"
12
+ rel="stylesheet" />
13
+ <link
14
+ href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&amp;display=swap"
15
+ rel="stylesheet" />
16
+
17
+ <!-- Tailwind Configuration -->
18
+ <script id="tailwind-config">
19
+ tailwind.config = {
20
+ darkMode: "class",
21
+ theme: {
22
+ extend: {
23
+ colors: {
24
+ surface: "#e6f0fd",
25
+ "crayon-blue": "#2563eb",
26
+ "crayon-red": "#dc2626",
27
+ "crayon-green": "#16a34a",
28
+ "crayon-yellow": "#ca8a04",
29
+ "crayon-orange": "#ea580c",
30
+ "crayon-purple": "#7c3aed",
31
+ "crayon-dark": "#1A1A1A"
32
+ },
33
+ fontFamily: {
34
+ "fredoka": ["Fredoka", "sans-serif"],
35
+ "caveat": ["Caveat", "cursive"]
36
+ },
37
+ fontSize: {
38
+ "headline-lg": ["42px", { lineHeight: "1.1", fontWeight: "700" }],
39
+ "headline-md": ["28px", { lineHeight: "1.2", fontWeight: "600" }],
40
+ "body-lg": ["24px", { lineHeight: "1.5", fontWeight: "500" }],
41
+ "label-sm": ["18px", { lineHeight: "1.2", letterSpacing: "0.01em", fontWeight: "500" }],
42
+ "body-md": ["20px", { lineHeight: "1.5", fontWeight: "400" }]
43
+ }
44
+ },
45
+ },
46
  }
47
+ </script>
48
 
49
+ <svg height="0" style="position: absolute;" width="0">
50
+ <filter height="120%" id="crayon-texture" width="120%" x="-10%" y="-10%">
51
+ <feTurbulence baseFrequency="0.4" numOctaves="3" result="noise" type="fractalNoise"></feTurbulence>
52
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="2.5" xChannelSelector="R" yChannelSelector="G">
53
+ </feDisplacementMap>
54
+ </filter>
55
+ <filter height="120%" id="crayon-heavy" width="120%" x="-10%" y="-10%">
56
+ <feTurbulence baseFrequency="0.5" numOctaves="4" result="noise" type="fractalNoise"></feTurbulence>
57
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="4" xChannelSelector="R" yChannelSelector="G">
58
+ </feDisplacementMap>
59
+ </filter>
60
+ </svg>
61
 
62
+ <style>
63
+ /* Sketchbook Styles */
64
  body {
65
+ background-color: #e6f0fd;
66
+ background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='noiseFilter'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.8' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23noiseFilter)' opacity='0.08'/%3E%3C/svg%3E");
67
+ color: #1A1A1A;
68
+ font-family: 'Fredoka', sans-serif;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
70
 
71
+ .bg-surface {
72
+ background-color: rgb(255 255 255 / 0%) !important;
73
+ backdrop-filter: blur(2px);
 
74
  }
75
 
76
+ .crayon-filter {
77
+ filter: url('#crayon-texture');
 
 
 
 
 
 
 
78
  }
79
 
80
+ .crayon-heavy {
81
+ filter: url('#crayon-heavy');
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
 
84
+ .crayon-border-green {
85
+ border: 4px solid #16a34a;
86
+ border-radius: 12px 8px 15px 10px / 8px 14px 10px 12px;
87
+ filter: url('#crayon-texture');
 
 
 
88
  }
89
 
90
+ .task-card {
91
+ border: 3px solid rgba(124, 58, 237, 0.4);
92
+ border-radius: 20px 15px 25px 18px / 18px 25px 15px 20px;
93
+ filter: url('#crayon-texture');
94
  }
95
 
96
+ .crayon-border-blue {
97
+ border: 4px dashed #2563eb;
98
+ border-radius: 15px 10px 12px 18px / 12px 18px 15px 10px;
99
+ filter: url('#crayon-texture');
100
  }
101
 
102
+ .crayon-border-purple {
103
+ border: 4px solid #7c3aed;
104
+ border-radius: 10px 16px 12px 14px / 16px 12px 14px 10px;
105
+ filter: url('#crayon-texture');
106
  }
107
 
108
+ .crayon-button {
109
+ border: 4px solid #2563eb;
110
+ border-radius: 12px 8px 14px 10px / 8px 14px 10px 12px;
 
 
 
 
 
 
111
  transition: all 0.2s ease;
112
+ filter: url('#crayon-texture');
113
+ cursor: pointer;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
+ .crayon-button:hover {
117
+ transform: scale(1.05) rotate(1deg);
118
+ box-shadow: 6px 6px 0px 0px rgba(0, 0, 0, 0.1);
119
  }
120
 
121
+ .crayon-button:hover .material-symbols-outlined.spin-on-hover {
122
+ animation: spin 2s linear infinite;
 
 
123
  }
124
 
125
+ @keyframes spin {
126
  from {
127
+ transform: rotate(0deg);
128
  }
129
 
130
  to {
131
+ transform: rotate(360deg);
132
  }
133
  }
134
 
135
+ @keyframes drift {
136
+ 0% {
137
+ transform: translateX(0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
 
140
  50% {
141
+ transform: translateX(20px);
142
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ 100% {
145
+ transform: translateX(0);
146
+ }
 
 
 
 
 
 
 
 
 
147
  }
148
 
149
+ .drift-slow {
150
+ animation: drift 8s ease-in-out infinite;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
 
153
+ .drift-medium {
154
+ animation: drift 5s ease-in-out infinite;
 
155
  }
156
 
157
+ .organic-shape {
158
+ border-radius: 255px 15px 225px 15px/15px 225px 15px 255px;
159
+ filter: url('#crayon-texture');
 
 
 
 
 
 
 
 
 
 
160
  }
161
 
162
+ .scribble-fill-green {
163
+ background: repeating-linear-gradient(60deg, #16a34a, #16a34a 2px, #15803d 3px, #16a34a 4px);
 
 
 
 
 
 
 
164
  }
165
 
166
+ .progress-fill {
167
+ background: repeating-linear-gradient(80deg, #2563eb, #2563eb 2px, #1d4ed8 3px, #2563eb 5px);
 
 
 
 
 
 
168
  }
169
 
170
+ .material-symbols-outlined {
171
+ font-variation-settings: 'FILL' 1, 'wght' 700, 'GRAD' 0, 'opsz' 48;
172
+ filter: url('#crayon-texture');
 
 
 
 
 
173
  }
174
 
175
+ /* --- Modals --- */
176
  .modal {
 
177
  position: fixed;
178
+ inset: 0;
179
+ background: rgba(15, 23, 42, 0.6);
180
+ display: none;
181
+ align-items: center;
182
+ justify-content: center;
183
+ z-index: 100;
184
+ padding: 2rem;
 
185
  }
186
 
187
  .modal.active {
188
  display: flex;
 
 
 
189
  }
190
 
191
  .modal-content {
192
+ border: 4px solid #2563eb;
 
 
 
193
  width: 100%;
194
+ max-width: 900px;
195
+ border-radius: 24px;
 
196
  display: flex;
197
  flex-direction: column;
198
+ max-height: 85vh;
199
+ box-shadow: 12px 12px 0px 0px rgba(0, 0, 0, 0.1);
200
+ position: relative;
201
  }
202
 
203
+ .modal-sketch-bg {
204
+ position: absolute;
205
+ inset: -8px;
206
+ border: 6px solid #2563eb;
207
+ backdrop-filter: blur(10px);
208
+ border-radius: 255px 15px 225px 15px/15px 225px 15px 255px;
209
+ z-index: -1;
210
+ filter: url('#crayon-texture');
211
+ pointer-events: none;
 
212
  }
213
 
214
  .modal-header {
215
+ padding: 1.5rem 2rem;
216
+ border-bottom: 3px dashed #adc6ff;
217
  display: flex;
218
  justify-content: space-between;
219
  align-items: center;
220
+ position: relative;
 
 
 
 
221
  z-index: 10;
222
  }
223
 
224
+ .modal-body {
225
+ padding: 2rem;
226
+ overflow-y: auto;
227
+ position: relative;
228
+ z-index: 10;
229
  }
230
 
231
+ #resultText,
232
+ pre {
233
+ border: 3px dashed #adc6ff !important;
234
+ padding: 2rem !important;
235
+ border-radius: 20px !important;
236
+ font-family: 'Fredoka', sans-serif !important;
237
+ font-size: 1.5rem !important;
238
+ font-weight: 600 !important;
239
+ color: #fff !important;
240
+ white-space: pre-wrap !important;
241
+ word-break: break-all !important;
242
+ line-height: 1.6 !important;
243
+ filter: url('#crayon-texture');
244
+ }
245
+
246
+ .close-modal {
247
+ background: transparent;
248
+ border: none;
249
+ color: #dc2626;
250
+ font-size: 3rem;
251
  cursor: pointer;
252
+ font-weight: 700;
 
 
 
253
  }
254
 
255
+ .text-headline-lg {
256
+ filter: url('#crayon-texture');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  }
258
 
259
  .copy-btn {
260
+ background: #16a34a;
261
+ color: white;
262
+ padding: 0.5rem 1.5rem;
263
+ border-radius: 12px;
264
+ font-weight: 700;
265
+ box-shadow: 4px 4px 0px 0px #15803d;
266
+ transition: all 0.2s;
267
+ filter: url('#crayon-texture');
 
 
 
 
 
268
  }
269
 
270
  .copy-btn:hover {
271
  transform: translate(-2px, -2px);
272
+ box-shadow: 6px 6px 0px 0px #15803d;
273
  }
274
 
275
+ /* --- Specific UI Elements --- */
276
+ .status-modal-content {
277
+ max-width: 450px;
278
+ text-align: center;
279
+ padding: 3rem 2rem;
280
  }
281
 
282
+ .status-modal-bg {
283
+ position: absolute;
284
+ inset: -12px;
285
+ border: 8px solid #2563eb;
286
+ background: #e6f0fd;
287
+ border-radius: 20px 40px 15px 35px / 35px 15px 40px 20px;
288
+ z-index: -1;
289
+ filter: url('#crayon-texture');
290
+ pointer-events: none;
291
+ }
 
 
 
 
 
 
 
 
 
292
 
293
+ .status-icon-container {
294
+ width: 100px;
295
+ height: 100px;
296
+ margin: 0 auto 1.5rem;
297
+ display: flex;
298
+ align-items: center;
299
+ justify-content: center;
300
+ border-radius: 20px 15px 25px 18px / 18px 25px 15px 20px;
301
+ border: 4px solid currentColor;
302
+ filter: url('#crayon-texture');
303
+ position: relative;
304
+ z-index: 20;
305
+ }
306
 
307
+ .status-icon-bg {
308
+ position: absolute;
309
+ inset: 0;
310
+ background: currentColor;
311
+ opacity: 0.15;
312
+ z-index: -1;
313
+ border-radius: inherit;
314
+ }
315
 
316
+ .modal-decoration {
317
+ position: absolute;
318
+ pointer-events: none;
319
+ opacity: 0.3;
320
+ z-index: 5;
321
+ filter: url('#crayon-texture');
322
  }
323
 
324
+ .table-container::-webkit-scrollbar {
325
+ width: 10px;
 
 
 
 
 
 
 
 
 
 
326
  }
327
 
328
+ .table-container::-webkit-scrollbar-track {
329
+ background: #f1f5f9;
330
+ border-radius: 10px;
331
+ }
 
332
 
333
+ .table-container::-webkit-scrollbar-thumb {
334
+ background: #cbd5e1;
335
+ border-radius: 10px;
336
+ border: 2px solid #f1f5f9;
337
  }
338
 
339
+ .dragging {
340
+ border-color: #16a34a !important;
341
+ background-color: #f0fdf4 !important;
 
 
342
  }
343
  </style>
344
  </head>
345
 
346
+ <body class="h-screen flex flex-col overflow-hidden relative">
347
+ <!-- Main Background Decorations -->
348
+ <div class="fixed inset-0 pointer-events-none overflow-hidden -z-10 opacity-40">
349
+ <!-- Stars -->
350
+ <span
351
+ class="material-symbols-outlined absolute text-5xl text-crayon-yellow top-20 left-[10%] rotate-12 crayon-heavy animate-pulse">star</span>
352
+ <span
353
+ class="material-symbols-outlined absolute text-3xl text-crayon-orange top-[40%] left-[5%] -rotate-12 crayon-filter">star</span>
354
+ <span
355
+ class="material-symbols-outlined absolute text-4xl text-crayon-yellow bottom-[20%] left-[15%] rotate-45 animate-pulse">star</span>
356
+ <span
357
+ class="material-symbols-outlined absolute text-6xl text-crayon-orange top-[15%] right-[15%] rotate-[-15deg] crayon-heavy">star</span>
358
+ <span
359
+ class="material-symbols-outlined absolute text-3xl text-crayon-yellow bottom-[30%] right-[10%] rotate-12 animate-pulse">star</span>
360
+
361
+ <!-- Clouds -->
362
+ <span
363
+ class="material-symbols-outlined absolute text-[120px] text-crayon-blue top-[10%] left-[25%] opacity-20 drift-slow">cloud</span>
364
+ <span
365
+ class="material-symbols-outlined absolute text-[80px] text-crayon-purple bottom-[15%] left-[40%] opacity-10 drift-medium">cloud</span>
366
+ <span
367
+ class="material-symbols-outlined absolute text-[100px] text-crayon-blue top-[60%] right-[25%] opacity-15 drift-slow">cloud</span>
368
+ <span
369
+ class="material-symbols-outlined absolute text-[150px] text-crayon-purple top-[30%] right-[5%] opacity-10 drift-medium">cloud</span>
370
+
371
+ <!-- Hearts -->
372
+ <span
373
+ class="material-symbols-outlined absolute text-4xl text-crayon-red top-[25%] left-[18%] rotate-[-15deg] crayon-filter animate-pulse">favorite</span>
374
+ <span
375
+ class="material-symbols-outlined absolute text-2xl text-crayon-red bottom-[10%] right-[20%] rotate-12 crayon-filter">favorite</span>
376
+ <span
377
+ class="material-symbols-outlined absolute text-5xl text-crayon-red top-[70%] left-[8%] rotate-[10deg] animate-pulse">favorite</span>
378
+ </div>
379
+
380
+ <!-- SVG Filters -->
381
+ <svg height="0" width="0" style="position: absolute;">
382
+ <filter id="crayon-texture" x="-10%" y="-10%" width="120%" height="120%">
383
+ <feTurbulence type="fractalNoise" baseFrequency="0.4" numOctaves="3" result="noise" />
384
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="2.5" xChannelSelector="R" yChannelSelector="G" />
385
+ </filter>
386
+ <filter id="crayon-heavy" x="-10%" y="-10%" width="120%" height="120%">
387
+ <feTurbulence type="fractalNoise" baseFrequency="0.5" numOctaves="4" result="noise" />
388
+ <feDisplacementMap in="SourceGraphic" in2="noise" scale="4" xChannelSelector="R" yChannelSelector="G" />
389
+ </filter>
390
+ </svg>
391
+
392
+ <!-- Header -->
393
+ <header
394
+ class="bg-surface flex justify-between items-center w-[calc(100%-48px)] mx-6 mt-6 px-8 py-5 crayon-border-green z-10 shrink-0 organic-shape shadow-sm">
395
+ <div class="flex items-center gap-5">
396
+ <div
397
+ class="bg-crayon-green text-white w-14 h-14 rounded-2xl flex items-center justify-center border-[3px] border-crayon-green rotate-[-4deg] crayon-filter scribble-fill-green shadow-md">
398
+ <span class="material-symbols-outlined text-4xl">mic</span>
399
+ </div>
400
+ <div class="flex flex-col -rotate-1">
401
+ <h1 class="text-headline-lg text-[#4c1d95] leading-none mb-1">STT</h1>
402
+ <span class="text-label-sm text-[#4b5563] font-bold">Speech to Text Generation</span>
403
  </div>
 
 
 
404
  </div>
405
 
406
+ <div class="flex items-center gap-10 relative">
407
+ <div class="absolute -left-48 top-2 rotate-12 crayon-heavy">
408
+ <span class="material-symbols-outlined text-4xl text-crayon-yellow">star</span>
409
+ </div>
410
+ <div class="absolute -left-24 top-0 -rotate-6 crayon-heavy opacity-80">
411
+ <span class="material-symbols-outlined text-5xl text-crayon-blue">cloud</span>
412
+ </div>
413
+ <button id="apiDocBtn"
414
+ class="flex items-center gap-2 text-headline-md text-crayon-purple px-8 py-3 bg-surface crayon-button rotate-1 shadow-md">
415
+ <span class="material-symbols-outlined text-3xl">menu_book</span>
416
+ API DOC
417
+ </button>
418
+ <div
419
+ class="flex items-center gap-4 bg-surface px-6 py-3 rounded-full border-[4px] border-crayon-green organic-shape shadow-md">
420
+ <div id="healthDot" class="w-4 h-4 rounded-full bg-crayon-green shadow-[0_0_12px_rgba(22,163,74,0.5)]">
421
+ </div>
422
+ <span id="healthText" class="text-headline-md text-crayon-green text-2xl">Service Online</span>
423
+ <div class="text-crayon-orange flex items-center justify-center rotate-[15deg]">
424
+ <span class="material-symbols-outlined text-4xl">light_mode</span>
425
+ </div>
 
426
  </div>
427
  </div>
428
+ </header>
429
+
430
+ <!-- Main Content -->
431
+ <main class="flex-1 flex overflow-hidden p-8 gap-8 mx-auto w-full relative">
432
+ <!-- Input Section -->
433
+ <section class="w-[450px] flex flex-col gap-8">
434
+ <div id="uploadZone"
435
+ class="flex flex-col gap-6 p-8 bg-surface crayon-border-blue flex-1 relative organic-shape cursor-pointer group shadow-sm hover:shadow-md transition-shadow">
436
+ <div class="flex items-center gap-4 mb-2 rotate-1">
437
+ <div
438
+ class="bg-crayon-blue text-white rounded-full w-14 h-14 flex items-center justify-center border-2 border-crayon-blue crayon-filter shadow-md">
439
+ <span class="material-symbols-outlined text-4xl">upload</span>
440
+ </div>
441
+ <div>
442
+ <h2 class="text-headline-md text-crayon-blue leading-none mb-1 flex items-center gap-2">
443
+ INPUT
444
+ <span class="material-symbols-outlined text-crayon-red text-2xl rotate-12">favorite</span>
445
+ </h2>
446
+ <p class="text-label-sm text-[#6b7280]">Upload audio files</p>
447
+ </div>
448
+ </div>
449
+
450
+ <div
451
+ class="flex-1 flex flex-col items-center justify-center relative border-[4px] border-dashed border-[#adc6ff] rounded-[32px] bg-surface p-6 group-hover:bg-blue-50 transition-colors">
452
+ <div class="relative w-48 h-48 mb-8 flex items-center justify-center">
453
+ <div
454
+ class="absolute w-36 h-44 bg-blue-100 border-[3px] border-crayon-blue rounded-xl rotate-[12deg] right-4 bottom-4 organic-shape opacity-60">
455
+ </div>
456
+ <div
457
+ class="absolute w-36 h-44 bg-surface border-[3px] border-crayon-blue rounded-xl z-10 flex flex-col items-center p-4 organic-shape rotate-[-4deg] shadow-sm">
458
+ <div
459
+ class="w-8 h-8 rounded-full bg-crayon-yellow self-start mb-4 border-[2px] border-[#1A1A1A]">
460
+ </div>
461
+ <div class="w-full h-2 bg-[#adc6ff] rounded-full mb-3"></div>
462
+ <div class="w-2/3 h-2 bg-[#adc6ff] rounded-full self-start"></div>
463
+ </div>
464
+ <div
465
+ class="absolute -bottom-4 -right-4 bg-crayon-blue text-white rounded-full w-16 h-16 flex items-center justify-center border-[4px] border-white z-20 shadow-xl group-hover:scale-110 transition-transform rotate-12 crayon-filter">
466
+ <span class="material-symbols-outlined text-4xl">arrow_upward</span>
467
+ </div>
468
+ </div>
469
+ <p class="text-headline-md text-[#1A1A1A] mb-1 font-bold">Drag & drop audio</p>
470
+ <p class="text-label-sm text-crayon-purple mb-8 font-bold">or click to browse</p>
471
+ <div class="flex gap-2 mt-auto w-full justify-center flex-wrap">
472
+ <span
473
+ class="px-4 py-1.5 bg-surface border-[2px] border-crayon-blue text-crayon-blue text-lg font-bold rounded-xl organic-shape -rotate-2">WAV</span>
474
+ <span
475
+ class="px-4 py-1.5 bg-surface border-[2px] border-crayon-green text-crayon-green text-lg font-bold rounded-xl organic-shape rotate-1">MP3</span>
476
+ <span
477
+ class="px-4 py-1.5 bg-surface border-[2px] border-crayon-red text-crayon-red text-lg font-bold rounded-xl organic-shape -rotate-1">FLAC</span>
478
+ </div>
479
+ </div>
480
+ <input type="file" id="fileInput" hidden accept="audio/*">
481
+ </div>
482
+ </section>
483
+
484
+ <!-- Activity Section -->
485
+ <section
486
+ class="flex-1 flex flex-col bg-surface crayon-border-purple p-8 relative organic-shape overflow-hidden shadow-sm">
487
+ <div class="flex items-center justify-between mb-8">
488
+ <div class="flex items-center gap-4 rotate-1">
489
+ <div
490
+ class="bg-crayon-purple text-white rounded-full w-14 h-14 flex items-center justify-center border-[3px] border-crayon-purple shadow-md">
491
+ <span class="material-symbols-outlined text-4xl">schedule</span>
492
+ </div>
493
+ <div>
494
+ <h2 class="text-headline-lg text-[#4c1d95] leading-none mb-1">ACTIVITY</h2>
495
+ <p class="text-label-sm text-[#6b7280] font-bold">Your recently processed audio</p>
496
+ </div>
497
+ </div>
498
+ <div class="relative w-40 h-20 flex items-center">
499
+ <svg class="absolute left-[-30px] top-4 w-32 h-16 crayon-filter text-crayon-yellow" fill="none"
500
+ stroke="currentColor" stroke-dasharray="6 6" stroke-linecap="round" stroke-width="3"
501
+ viewBox="0 0 100 50">
502
+ <path d="M10,40 Q40,50 60,30 T90,10" />
503
+ </svg>
504
+ <span
505
+ class="material-symbols-outlined text-5xl text-crayon-orange absolute right-0 top-0 rotate-12">send</span>
506
+ </div>
507
+ <button onclick="loadTasks()"
508
+ class="flex items-center gap-2 text-headline-md text-crayon-blue px-8 py-3 bg-surface crayon-button border-[4px] -rotate-1 shadow-md">
509
+ <span class="material-symbols-outlined text-3xl spin-on-hover">sync</span>
510
+ Refresh
511
+ </button>
512
+ </div>
513
 
514
+ <!-- List Header -->
515
+ <div class="flex w-full px-6 pb-4 border-b-[5px] text-[#4c1d95] font-bold text-xl uppercase tracking-wider"
516
+ style="border-color: rgba(124, 58, 237, 0.4); border-radius: 20px 15px 25px 18px / 18px 25px 15px 20px; filter: url(#crayon-texture);">
517
+ <div class="w-1/2">FILE NAME</div>
518
+ <div class="w-1/6 text-center">STATUS</div>
519
+ <div class="w-1/4 text-center">PROGRESS</div>
520
+ <div class="w-[10%] text-center">ACTION</div>
521
+ </div>
522
 
523
+ <!-- Task List Body -->
524
+ <div id="queueBody" class="flex flex-col gap-5 mt-6 overflow-y-auto flex-1 table-container pr-4">
525
+ <div class="text-center py-32 text-headline-md text-[#94a3b8] font-bold opacity-60">No tasks found
526
+ yet...</div>
527
+ </div>
528
+ </section>
529
+ </main>
530
 
531
+ <!-- Modals -->
532
+ <!-- Result & API Modal -->
533
+ <div id="resultModal" class="modal">
534
  <div class="modal-content">
535
+ <div class="modal-sketch-bg"></div>
536
+
537
+ <!-- Decorations -->
538
+ <span
539
+ class="material-symbols-outlined modal-decoration text-6xl text-crayon-yellow top-4 left-4 -rotate-12">star</span>
540
+ <span
541
+ class="material-symbols-outlined modal-decoration text-8xl text-crayon-blue top-12 right-20 opacity-20">cloud</span>
542
+ <span
543
+ class="material-symbols-outlined modal-decoration text-4xl text-crayon-orange bottom-10 left-10 rotate-45">star</span>
544
+ <span
545
+ class="material-symbols-outlined modal-decoration text-7xl text-crayon-purple bottom-4 right-4 -rotate-6 opacity-40">cloud</span>
546
+
547
  <div class="modal-header">
548
+ <div class="flex items-center gap-6">
549
+ <span id="modalTitle" class="text-headline-lg text-[#1e1b4b]">Transcription Result</span>
550
+ <button id="copyBtn" onclick="copyResult()" class="copy-btn">📋 Copy Text</button>
551
+ </div>
552
+ <button class="close-modal" onclick="closeModal()">&times;</button>
553
  </div>
554
+ <div class="modal-body">
555
+ <pre id="resultText"></pre>
 
556
  </div>
557
  </div>
558
  </div>
559
 
560
+ <!-- Status Modal -->
561
+ <div id="statusModal" class="modal">
562
+ <div class="modal-content status-modal-content">
563
+ <div id="statusBg" class="status-modal-bg"></div>
564
+
565
+ <span
566
+ class="material-symbols-outlined modal-decoration text-4xl text-crayon-yellow top-6 right-6 rotate-12">star</span>
567
+ <span
568
+ class="material-symbols-outlined modal-decoration text-6xl text-crayon-blue top-10 left-4 opacity-30">cloud</span>
569
+ <span
570
+ class="material-symbols-outlined modal-decoration text-3xl text-crayon-orange bottom-8 right-10 -rotate-12">star</span>
571
+ <span
572
+ class="material-symbols-outlined modal-decoration text-5xl text-crayon-purple bottom-10 left-8 rotate-6 opacity-30">cloud</span>
573
+
574
+ <div id="statusIconContainer" class="status-icon-container text-crayon-blue">
575
+ <div class="status-icon-bg"></div>
576
+ <span id="statusIcon" class="material-symbols-outlined text-6xl animate-bounce">upload</span>
577
+ </div>
578
+ <h2 id="statusMessage" class="text-headline-lg text-crayon-blue mb-2">Uploading...</h2>
579
+ <p id="statusSubMessage" class="text-body-lg text-[#4b5563]">Processing your request, please wait.</p>
580
+ </div>
581
+ </div>
582
 
583
+ <!-- Application Logic -->
584
+ <script>
585
+ // --- Configuration ---
586
+ const API_BASE = '/api';
587
+
588
+ // --- DOM Elements ---
589
+ const UI = {
590
+ uploadZone: document.getElementById('uploadZone'),
591
+ fileInput: document.getElementById('fileInput'),
592
+ queueBody: document.getElementById('queueBody'),
593
+ resultModal: document.getElementById('resultModal'),
594
+ statusModal: document.getElementById('statusModal'),
595
+ resultText: document.getElementById('resultText'),
596
+ modalTitle: document.getElementById('modalTitle'),
597
+ statusMessage: document.getElementById('statusMessage'),
598
+ statusSubMessage: document.getElementById('statusSubMessage'),
599
+ statusIcon: document.getElementById('statusIcon'),
600
+ statusIconContainer: document.getElementById('statusIconContainer'),
601
+ statusBg: document.getElementById('statusBg'),
602
+ healthDot: document.getElementById('healthDot'),
603
+ healthText: document.getElementById('healthText'),
604
+ apiDocBtn: document.getElementById('apiDocBtn')
605
+ };
606
+
607
+ // --- UI Helpers ---
608
+ function updateStatusModal(type, msg, subMsg) {
609
+ UI.statusMessage.innerText = msg;
610
+ UI.statusSubMessage.innerText = subMsg || "Processing your request, please wait.";
611
+ UI.statusIconContainer.className = "status-icon-container";
612
+ UI.statusIcon.className = "material-symbols-outlined text-6xl";
613
+ UI.statusBg.style.borderColor = "";
614
+
615
+ if (type === 'uploading') {
616
+ UI.statusIconContainer.classList.add('text-crayon-blue');
617
+ UI.statusIcon.innerText = "upload";
618
+ UI.statusIcon.classList.add('animate-bounce');
619
+ UI.statusBg.style.borderColor = "#2563eb";
620
+ } else if (type === 'success') {
621
+ UI.statusIconContainer.classList.add('text-crayon-green');
622
+ UI.statusIcon.innerText = "check_circle";
623
+ UI.statusBg.style.borderColor = "#16a34a";
624
+ } else if (type === 'error') {
625
+ UI.statusIconContainer.classList.add('text-crayon-red');
626
+ UI.statusIcon.innerText = "error";
627
+ UI.statusBg.style.borderColor = "#dc2626";
628
+ }
629
+ }
630
 
631
+ function closeModal() {
632
+ UI.resultModal.classList.remove('active');
633
+ }
 
634
 
635
+ function copyResult() {
636
+ const text = UI.resultText.innerText;
637
+ const btn = document.getElementById('copyBtn');
638
+ navigator.clipboard.writeText(text).then(() => {
639
+ const orig = btn.innerText;
640
+ btn.innerText = '✓ Copied!';
641
+ setTimeout(() => { btn.innerText = orig; }, 2000);
642
+ });
643
+ }
644
 
645
+ // --- API Functions ---
646
+ async function loadTasks() {
647
+ try {
648
+ const res = await fetch(`${API_BASE}/tasks`);
649
+ const data = await res.json();
650
+ renderQueue(data);
651
+ } catch (err) {
652
+ console.error("Load tasks error:", err);
 
 
 
 
 
 
 
 
 
 
 
 
653
  }
654
+ }
655
 
656
+ async function handleFile(file) {
657
+ if (!file) return;
658
 
659
+ UI.statusModal.classList.add('active');
660
+ updateStatusModal('uploading', "Uploading...", "Sending file to server...");
 
 
 
661
 
662
  const formData = new FormData();
663
+ formData.append('audio', file);
 
 
 
 
664
 
665
  try {
666
+ const res = await fetch(`${API_BASE}/tasks/upload`, { method: 'POST', body: formData });
667
+ if (res.ok) {
668
+ updateStatusModal('success', "Success! ✨", "File uploaded and task created.");
669
+ setTimeout(() => {
670
+ UI.statusModal.classList.remove('active');
671
+ loadTasks();
672
+ }, 1200);
 
 
 
 
 
673
  } else {
674
+ updateStatusModal('error', "Upload Failed ❌", "Something went wrong on our end.");
675
+ setTimeout(() => UI.statusModal.classList.remove('active'), 2000);
676
  }
677
+ } catch (err) {
678
+ console.error("Upload error:", err);
679
+ updateStatusModal('error', "Connection Error ⚠️", "Could not reach the server.");
680
+ setTimeout(() => UI.statusModal.classList.remove('active'), 2000);
 
 
681
  }
682
  }
683
 
684
+ async function showResult(id) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  try {
686
+ const res = await fetch(`${API_BASE}/tasks/${id}`);
687
+ const data = await res.json();
688
+ const text = data.result;
 
689
 
690
+ UI.modalTitle.innerText = "Transcription Result";
 
691
 
692
+ let formatted = text;
693
+ try {
694
+ const parsed = JSON.parse(text);
695
+ formatted = JSON.stringify(parsed, null, 2);
696
+ } catch (e) { /* Not JSON, use raw text */ }
 
 
 
 
 
697
 
698
+ UI.resultText.innerText = formatted;
699
+ UI.resultModal.classList.add('active');
700
+ } catch (err) {
701
+ console.error("Show result error:", err);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
  }
703
  }
704
 
705
+ async function checkHealth() {
 
 
 
 
 
 
 
 
 
 
 
 
706
  try {
707
+ const res = await fetch('/health');
708
+ const data = await res.json();
709
+ const healthy = data.status === 'healthy';
710
+
711
+ UI.healthDot.className = `w-4 h-4 rounded-full ${healthy ? 'bg-crayon-green' : 'bg-crayon-red'} shadow-md`;
712
+ UI.healthText.innerText = healthy ? 'Service Online' : 'Service Down';
713
+ UI.healthText.className = `text-headline-md font-bold ${healthy ? 'text-crayon-green' : 'text-crayon-red'}`;
714
  } catch (e) {
715
+ UI.healthDot.className = 'w-4 h-4 rounded-full bg-crayon-red shadow-md';
716
+ UI.healthText.innerText = 'Connection Error';
717
+ UI.healthText.className = 'text-headline-md font-bold text-crayon-red';
718
  }
 
 
 
 
 
 
 
 
719
  }
720
 
721
+ // --- Renderers ---
722
+ function renderQueue(tasks) {
723
+ if (tasks.length === 0) {
724
+ UI.queueBody.innerHTML = '<div class="text-center py-32 text-headline-md text-[#94a3b8] font-bold opacity-60">No tasks found yet...</div>';
725
+ return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
 
728
+ UI.queueBody.innerHTML = tasks.map((t, i) => {
729
+ const rotate = i % 2 === 0 ? 'rotate-[0.3deg]' : '-rotate-[0.3deg]';
730
+ const status = t.status.toLowerCase();
731
+ const colors = {
732
+ completed: { text: 'crayon-green', bg: 'bg-[#f0fdf4]' },
733
+ failed: { text: 'crayon-red', bg: 'bg-[#fef2f2]' },
734
+ processing: { text: 'crayon-blue', bg: 'bg-[#eff6ff]' },
735
+ pending: { text: 'crayon-purple', bg: 'bg-[#f5f3ff]' }
736
+ };
737
+ const theme = colors[status] || colors.pending;
738
+
739
+ return `
740
+ <div class="task-card flex items-center p-6 bg-surface hover:border-crayon-purple transition-colors shadow-sm ${rotate}">
741
+ <div class="flex items-center gap-5 w-1/2">
742
+ <div class="w-16 h-20 border-[3px] border-crayon-blue rounded-xl p-2 bg-surface flex shadow-sm organic-shape rotate-[-3deg]">
743
+ <div class="w-1/2 h-full border-r-[2px] border-[#adc6ff] flex flex-col gap-1.5 p-0.5">
744
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
745
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
746
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
747
+ </div>
748
+ <div class="w-1/2 h-full flex flex-col gap-1.5 p-0.5 relative">
749
+ <div class="w-full h-2 bg-[#adc6ff] rounded-sm"></div>
750
+ <div class="mt-auto flex gap-1.5 bottom-1 absolute">
751
+ <div class="w-2.5 h-2.5 bg-crayon-yellow rounded-sm"></div>
752
+ <div class="w-2.5 h-2.5 bg-crayon-green rounded-sm"></div>
753
+ </div>
754
+ </div>
755
+ </div>
756
+ <div class="flex flex-col">
757
+ <span class="text-headline-md text-[#1A1A1A] leading-tight font-bold mb-1">${t.filename}</span>
758
+ <div class="text-label-sm text-[#94a3b8] font-bold">${t.id.substring(0, 12)}</div>
759
+ </div>
760
+ </div>
761
+ <div class="w-1/6 flex justify-center">
762
+ <div class="px-4 py-2 ${theme.bg} border-[3px] border-${theme.text} text-${theme.text} font-bold rounded-2xl uppercase tracking-tight crayon-filter">
763
+ ${status.replace('_', ' ')}
764
+ </div>
765
+ </div>
766
+ <div class="w-1/4 flex items-center justify-center gap-4">
767
+ <div class="flex-1 h-6 border-[3px] border-[#adc6ff] rounded-full overflow-hidden bg-surface p-[2px] crayon-filter">
768
+ <div class="h-full rounded-full progress-fill shadow-sm" style="width:${t.progress}%"></div>
769
+ </div>
770
+ <span class="text-headline-md text-2xl text-[#1A1A1A] font-bold w-12 text-right">${status === 'completed' ? '' : t.progress + '%'}</span>
771
+ </div>
772
+ <div class="w-[10%] flex justify-center">
773
+ ${status === 'completed' ? `
774
+ <button onclick="showResult('${t.id}')" class="flex items-center gap-2 px-5 py-2.5 bg-white border-[3px] border-crayon-blue text-crayon-blue font-bold rounded-2xl hover:bg-crayon-blue hover:text-white transition-all shadow-sm crayon-filter">
775
+ <span class="material-symbols-outlined text-2xl">visibility</span>
776
+ VIEW
777
+ </button>
778
+ ` : '—'}
779
+ </div>
780
+ </div>
781
+ `;
782
+ }).join('');
783
  }
784
 
785
+ // --- Event Listeners ---
786
+ UI.uploadZone.onclick = () => UI.fileInput.click();
787
+ UI.uploadZone.ondragover = (e) => { e.preventDefault(); UI.uploadZone.classList.add('dragging'); };
788
+ UI.uploadZone.ondragleave = () => UI.uploadZone.classList.remove('dragging');
789
+ UI.uploadZone.ondrop = (e) => {
790
+ e.preventDefault();
791
+ UI.uploadZone.classList.remove('dragging');
792
+ handleFile(e.dataTransfer.files[0]);
793
+ };
794
+ UI.fileInput.onchange = (e) => handleFile(e.target.files[0]);
795
+ UI.apiDocBtn.onclick = () => {
796
+ const doc = {
797
+ base_url: window.location.origin,
798
+ endpoints: [
799
+ { method: "POST", path: "/api/tasks/upload", desc: "Upload audio for STT" },
800
+ { method: "GET", path: "/api/tasks", desc: "List all tasks" },
801
+ { method: "GET", path: "/api/tasks/{task_id}", desc: "Get STT result" },
802
+ { method: "GET", path: "/health", desc: "Service health" }
803
+ ],
804
+ example_usage: `curl -X POST -F 'audio=@file.wav' ${window.location.origin}/api/tasks/upload`
805
+ };
806
+ UI.resultText.innerText = JSON.stringify(doc, null, 2);
807
+ UI.modalTitle.innerText = "API Documentation";
808
+ UI.resultModal.classList.add('active');
809
+ };
810
+
811
+ // --- Lifecycle ---
812
+ loadTasks();
813
+ setInterval(loadTasks, 5000);
814
+ setInterval(checkHealth, 10000);
815
  </script>
816
  </body>
817
 
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "stt-backend"
7
+ version = "2.0.0"
8
+ description = "FastAPI backend for STT"
9
+ dependencies = [
10
+ "fastapi",
11
+ "uvicorn",
12
+ "aiosqlite",
13
+ "aiofiles",
14
+ "python-multipart",
15
+ "custom_logger @ git+https://github.com/jebin2/custom_logger.git",
16
+ "stt-runner[parakeet] @ git+https://github.com/jebin2/STT.git"
17
+ ]
18
+
19
+ [project.scripts]
20
+ stt-backend = "app.main:app"
21
+
22
+ [tool.setuptools.packages.find]
23
+ include = ["app*"]
requirements.txt DELETED
@@ -1,6 +0,0 @@
1
- Flask==3.0.0
2
- flask-cors==4.0.0
3
- werkzeug==3.0.1
4
-
5
- #STT
6
- git+https://github.com/jebin2/STT.git#egg=stt-runner[parakeet]
 
 
 
 
 
 
 
run.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ import os
3
+
4
+ if __name__ == "__main__":
5
+ port = int(os.environ.get("PORT", 7860))
6
+ uvicorn.run("app.main:app", host="0.0.0.0", port=port, reload=True)
uploads/e31e8563-3860-440c-aedc-d7916aa5243a_tts_498cf025-18a6-40bd-8fff-02c7d35852b1.wav ADDED
Binary file (89.3 kB). View file
 
worker.py DELETED
@@ -1,138 +0,0 @@
1
- import sqlite3
2
- import time
3
- import os
4
- import subprocess
5
- import json
6
- import shlex
7
- from datetime import datetime
8
-
9
- CWD = "./"
10
- PYTHON_PATH = "stt-transcribe"
11
- STT_MODEL_NAME = "fasterwhispher"
12
- POLL_INTERVAL = 3 # seconds
13
-
14
- def process_audio(file_id, filepath):
15
- """Process audio file using STT and return the transcription"""
16
- try:
17
- print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
18
-
19
- # Run STT command
20
- command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
21
-
22
- subprocess.run(
23
- command,
24
- shell=True,
25
- executable="/bin/bash",
26
- check=True,
27
- cwd=CWD,
28
- env={
29
- **os.environ,
30
- 'PYTHONUNBUFFERED': '1',
31
- 'CUDA_LAUNCH_BLOCKING': '1',
32
- 'USE_CPU_IF_POSSIBLE': 'true'
33
- }
34
- )
35
-
36
- # Read transcription result
37
- output_path = f'{CWD}/temp_dir/output_transcription.json'
38
- with open(output_path, 'r') as file:
39
- result = json.loads(file.read().strip())
40
-
41
- # Extract caption text (adjust based on your actual output format)
42
- caption = result.get('text', '') or result.get('transcription', '') or str(result)
43
-
44
- return caption, None
45
-
46
- except Exception as e:
47
- print(f"❌ Error processing file {file_id}: {str(e)}")
48
- return None, str(e)
49
-
50
- def update_status(file_id, status, caption=None, error=None):
51
- """Update the status of a file in the database"""
52
- conn = sqlite3.connect('audio_captions.db')
53
- c = conn.cursor()
54
-
55
- if status == 'completed':
56
- c.execute('''UPDATE audio_files
57
- SET status = ?, caption = ?, processed_at = ?
58
- WHERE id = ?''',
59
- (status, caption, datetime.now().isoformat(), file_id))
60
- elif status == 'failed':
61
- c.execute('''UPDATE audio_files
62
- SET status = ?, caption = ?, processed_at = ?
63
- WHERE id = ?''',
64
- (status, f"Error: {error}", datetime.now().isoformat(), file_id))
65
- else:
66
- c.execute('UPDATE audio_files SET status = ? WHERE id = ?', (status, file_id))
67
-
68
- conn.commit()
69
- conn.close()
70
-
71
- def worker_loop():
72
- """Main worker loop that processes audio files"""
73
- print("🤖 STT Worker started. Monitoring for new audio files...")
74
- print("🗑️ Audio files will be deleted after successful processing\n")
75
-
76
- while True:
77
- try:
78
- # Get next unprocessed file
79
- conn = sqlite3.connect('audio_captions.db')
80
- conn.row_factory = sqlite3.Row
81
- c = conn.cursor()
82
- c.execute('''SELECT * FROM audio_files
83
- WHERE status = 'not_started'
84
- ORDER BY created_at ASC
85
- LIMIT 1''')
86
- row = c.fetchone()
87
- conn.close()
88
-
89
- if row:
90
- file_id = row['id']
91
- filepath = row['filepath']
92
- filename = row['filename']
93
-
94
- print(f"\n{'='*60}")
95
- print(f"🎵 Processing: {filename}")
96
- print(f"📝 ID: {file_id}")
97
- print(f"{'='*60}")
98
-
99
- # Update status to processing
100
- update_status(file_id, 'processing')
101
-
102
- # Process the audio file
103
- caption, error = process_audio(file_id, filepath)
104
-
105
- if caption:
106
- print(f"✅ Successfully processed: {filename}")
107
- print(f"📄 Caption preview: {caption[:100]}...")
108
- update_status(file_id, 'completed', caption=caption)
109
-
110
- # Delete the audio file after successful processing
111
- if os.path.exists(filepath):
112
- os.remove(filepath)
113
- print(f"🗑️ Deleted audio file: {filepath}")
114
- else:
115
- print(f"❌ Failed to process: {filename}")
116
- print(f"Error: {error}")
117
- update_status(file_id, 'failed', error=error)
118
- # Don't delete file on failure (for debugging)
119
- else:
120
- # No files to process, sleep for a bit
121
- time.sleep(POLL_INTERVAL)
122
-
123
- except Exception as e:
124
- print(f"⚠️ Worker error: {str(e)}")
125
- time.sleep(POLL_INTERVAL)
126
-
127
- if __name__ == '__main__':
128
- # Initialize database if it doesn't exist
129
- if not os.path.exists('audio_captions.db'):
130
- print("❌ Database not found. Please run app.py first to initialize.")
131
- else:
132
- print("\n" + "="*60)
133
- print("🚀 Starting STT Worker (Standalone Mode)")
134
- print("="*60)
135
- print("⚠️ Note: Worker is now embedded in app.py")
136
- print("⚠️ This standalone mode is for testing/debugging only")
137
- print("="*60 + "\n")
138
- worker_loop()