github-actions[bot] commited on
Commit
f145097
·
1 Parent(s): c4576b6

Auto-deploy from GitHub: 898f681ae67425055f39b96fde956658f2fe0d29

Browse files
Files changed (7) hide show
  1. .gitattributes +0 -35
  2. Dockerfile +34 -0
  3. README.md +29 -5
  4. app.py +124 -0
  5. index.html +546 -0
  6. requirements.txt +6 -0
  7. worker.py +125 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ ffmpeg \
9
+ git \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements first for better caching
14
+ COPY requirements.txt .
15
+
16
+ # Install Python dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application files
20
+ COPY . .
21
+
22
+ # Create necessary directories
23
+ RUN mkdir -p uploads temp_dir
24
+
25
+ # Expose port
26
+ EXPOSE 7860
27
+
28
+ # Create a startup script
29
+ RUN echo '#!/bin/bash\n\
30
+ python worker.py &\n\
31
+ python app.py' > /app/start.sh && chmod +x /app/start.sh
32
+
33
+ # Run the application
34
+ CMD ["/app/start.sh"]
README.md CHANGED
@@ -1,10 +1,34 @@
1
  ---
2
- title: STT
3
- emoji: 📚
4
- colorFrom: gray
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: STT Audio Caption Generator
3
+ emoji: 🎵
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # Audio Caption Generator
12
+
13
+ A Python-based audio transcription service with a neobrutalist web interface.
14
+
15
+ ## Features
16
+ - 🎵 Audio file upload via REST API
17
+ - 🤖 Automatic STT processing using faster-whisper
18
+ - 💾 SQLite database for queue management
19
+ - 🎨 Neobrutalist UI with smooth animations
20
+ - 🔄 Real-time status updates
21
+
22
+ ## Usage
23
+ Access the web interface at the Space URL above.
24
+
25
+ ## API Endpoints
26
+ - POST `/api/upload` - Upload audio file
27
+ - GET `/api/files` - Get all files
28
+ - GET `/api/files/<id>` - Get specific file
29
+
30
+ ## Supported Formats
31
+ WAV, MP3, FLAC, OGG, M4A, AAC
32
+
33
+ ---
34
+ *Auto-deployed from GitHub*
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_from_directory
2
+ from flask_cors import CORS
3
+ import sqlite3
4
+ import os
5
+ import uuid
6
+ from datetime import datetime
7
+ from werkzeug.utils import secure_filename
8
+
9
+ app = Flask(__name__)
10
+ CORS(app)
11
+
12
+ UPLOAD_FOLDER = 'uploads'
13
+ ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac', 'ogg', 'm4a', 'aac'}
14
+
15
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
16
+ os.makedirs('temp_dir', exist_ok=True)
17
+
18
+ def init_db():
19
+ conn = sqlite3.connect('audio_captions.db')
20
+ c = conn.cursor()
21
+ c.execute('''CREATE TABLE IF NOT EXISTS audio_files
22
+ (id TEXT PRIMARY KEY,
23
+ filename TEXT NOT NULL,
24
+ filepath TEXT NOT NULL,
25
+ status TEXT NOT NULL,
26
+ caption TEXT,
27
+ created_at TEXT NOT NULL,
28
+ processed_at TEXT)''')
29
+ conn.commit()
30
+ conn.close()
31
+
32
+ def allowed_file(filename):
33
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
34
+
35
+ @app.route('/')
36
+ def index():
37
+ return send_from_directory('.', 'index.html')
38
+
39
+ @app.route('/api/upload', methods=['POST'])
40
+ def upload_audio():
41
+ if 'audio' not in request.files:
42
+ return jsonify({'error': 'No audio file provided'}), 400
43
+
44
+ file = request.files['audio']
45
+
46
+ if file.filename == '':
47
+ return jsonify({'error': 'No file selected'}), 400
48
+
49
+ if not allowed_file(file.filename):
50
+ return jsonify({'error': 'Invalid file type'}), 400
51
+
52
+ file_id = str(uuid.uuid4())
53
+ filename = secure_filename(file.filename)
54
+ filepath = os.path.join(UPLOAD_FOLDER, f"{file_id}_{filename}")
55
+ file.save(filepath)
56
+
57
+ conn = sqlite3.connect('audio_captions.db')
58
+ c = conn.cursor()
59
+ c.execute('''INSERT INTO audio_files
60
+ (id, filename, filepath, status, created_at)
61
+ VALUES (?, ?, ?, ?, ?)''',
62
+ (file_id, filename, filepath, 'not_started', datetime.now().isoformat()))
63
+ conn.commit()
64
+ conn.close()
65
+
66
+ return jsonify({
67
+ 'id': file_id,
68
+ 'filename': filename,
69
+ 'status': 'not_started',
70
+ 'message': 'File uploaded successfully'
71
+ }), 201
72
+
73
+ @app.route('/api/files', methods=['GET'])
74
+ def get_files():
75
+ conn = sqlite3.connect('audio_captions.db')
76
+ conn.row_factory = sqlite3.Row
77
+ c = conn.cursor()
78
+ c.execute('SELECT * FROM audio_files ORDER BY created_at DESC')
79
+ rows = c.fetchall()
80
+ conn.close()
81
+
82
+ files = []
83
+ for row in rows:
84
+ files.append({
85
+ 'id': row['id'],
86
+ 'filename': row['filename'],
87
+ 'status': row['status'],
88
+ 'caption': row['caption'],
89
+ 'created_at': row['created_at'],
90
+ 'processed_at': row['processed_at']
91
+ })
92
+
93
+ return jsonify(files)
94
+
95
+ @app.route('/api/files/<file_id>', methods=['GET'])
96
+ def get_file(file_id):
97
+ conn = sqlite3.connect('audio_captions.db')
98
+ conn.row_factory = sqlite3.Row
99
+ c = conn.cursor()
100
+ c.execute('SELECT * FROM audio_files WHERE id = ?', (file_id,))
101
+ row = c.fetchone()
102
+ conn.close()
103
+
104
+ if row is None:
105
+ return jsonify({'error': 'File not found'}), 404
106
+
107
+ return jsonify({
108
+ 'id': row['id'],
109
+ 'filename': row['filename'],
110
+ 'status': row['status'],
111
+ 'caption': row['caption'],
112
+ 'created_at': row['created_at'],
113
+ 'processed_at': row['processed_at']
114
+ })
115
+
116
+ @app.route('/health', methods=['GET'])
117
+ def health():
118
+ return jsonify({'status': 'healthy', 'service': 'audio-caption-generator'})
119
+
120
+ if __name__ == '__main__':
121
+ init_db()
122
+ # Use PORT environment variable for Hugging Face compatibility
123
+ port = int(os.environ.get('PORT', 7860))
124
+ app.run(debug=False, host='0.0.0.0', port=port)
index.html ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Audio Caption Generator</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ :root {
15
+ --bg: #0a0e27;
16
+ --surface: #141b3d;
17
+ --primary: #00ff88;
18
+ --secondary: #ff00ff;
19
+ --accent: #00d4ff;
20
+ --error: #ff1744;
21
+ --text: #ffffff;
22
+ --border: 4px;
23
+ }
24
+
25
+ body {
26
+ font-family: 'Space Grotesk', 'Courier New', monospace;
27
+ background: var(--bg);
28
+ color: var(--text);
29
+ min-height: 100vh;
30
+ overflow-x: hidden;
31
+ position: relative;
32
+ }
33
+
34
+ body::before {
35
+ content: '';
36
+ position: fixed;
37
+ top: 0;
38
+ left: 0;
39
+ width: 100%;
40
+ height: 100%;
41
+ background:
42
+ radial-gradient(circle at 20% 50%, rgba(0, 255, 136, 0.1) 0%, transparent 50%),
43
+ radial-gradient(circle at 80% 80%, rgba(255, 0, 255, 0.1) 0%, transparent 50%),
44
+ radial-gradient(circle at 40% 20%, rgba(0, 212, 255, 0.1) 0%, transparent 50%);
45
+ pointer-events: none;
46
+ z-index: 0;
47
+ }
48
+
49
+ .container {
50
+ max-width: 1400px;
51
+ margin: 0 auto;
52
+ padding: 2rem;
53
+ position: relative;
54
+ z-index: 1;
55
+ }
56
+
57
+ header {
58
+ text-align: center;
59
+ margin-bottom: 3rem;
60
+ animation: slideDown 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55);
61
+ }
62
+
63
+ @keyframes slideDown {
64
+ from {
65
+ opacity: 0;
66
+ transform: translateY(-50px);
67
+ }
68
+ to {
69
+ opacity: 1;
70
+ transform: translateY(0);
71
+ }
72
+ }
73
+
74
+ h1 {
75
+ font-size: clamp(2rem, 5vw, 4rem);
76
+ font-weight: 900;
77
+ background: linear-gradient(135deg, var(--primary) 0%, var(--accent) 50%, var(--secondary) 100%);
78
+ -webkit-background-clip: text;
79
+ -webkit-text-fill-color: transparent;
80
+ background-clip: text;
81
+ text-transform: uppercase;
82
+ letter-spacing: -2px;
83
+ margin-bottom: 1rem;
84
+ position: relative;
85
+ display: inline-block;
86
+ }
87
+
88
+ h1::after {
89
+ content: '';
90
+ position: absolute;
91
+ bottom: -10px;
92
+ left: 50%;
93
+ transform: translateX(-50%);
94
+ width: 60%;
95
+ height: 6px;
96
+ background: linear-gradient(90deg, transparent, var(--primary), transparent);
97
+ animation: glow 2s ease-in-out infinite;
98
+ }
99
+
100
+ @keyframes glow {
101
+ 0%, 100% { opacity: 0.5; }
102
+ 50% { opacity: 1; }
103
+ }
104
+
105
+ .subtitle {
106
+ font-size: 1.2rem;
107
+ color: var(--accent);
108
+ letter-spacing: 2px;
109
+ }
110
+
111
+ .upload-section {
112
+ background: var(--surface);
113
+ border: var(--border) solid var(--primary);
114
+ box-shadow: 8px 8px 0 var(--primary);
115
+ padding: 2rem;
116
+ margin-bottom: 3rem;
117
+ position: relative;
118
+ transition: all 0.3s ease;
119
+ animation: slideUp 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55) 0.2s both;
120
+ }
121
+
122
+ @keyframes slideUp {
123
+ from {
124
+ opacity: 0;
125
+ transform: translateY(50px);
126
+ }
127
+ to {
128
+ opacity: 1;
129
+ transform: translateY(0);
130
+ }
131
+ }
132
+
133
+ .upload-section:hover {
134
+ transform: translate(-2px, -2px);
135
+ box-shadow: 12px 12px 0 var(--primary);
136
+ }
137
+
138
+ .upload-zone {
139
+ border: 3px dashed var(--accent);
140
+ padding: 3rem;
141
+ text-align: center;
142
+ cursor: pointer;
143
+ transition: all 0.3s ease;
144
+ background: rgba(0, 212, 255, 0.05);
145
+ }
146
+
147
+ .upload-zone:hover {
148
+ background: rgba(0, 212, 255, 0.1);
149
+ border-color: var(--primary);
150
+ }
151
+
152
+ .upload-zone.dragging {
153
+ background: rgba(0, 255, 136, 0.2);
154
+ border-color: var(--primary);
155
+ transform: scale(1.02);
156
+ }
157
+
158
+ input[type="file"] {
159
+ display: none;
160
+ }
161
+
162
+ .btn {
163
+ background: var(--primary);
164
+ color: var(--bg);
165
+ border: var(--border) solid var(--bg);
166
+ padding: 1rem 2rem;
167
+ font-size: 1.1rem;
168
+ font-weight: 900;
169
+ text-transform: uppercase;
170
+ cursor: pointer;
171
+ transition: all 0.2s ease;
172
+ box-shadow: 4px 4px 0 var(--bg);
173
+ letter-spacing: 1px;
174
+ }
175
+
176
+ .btn:hover {
177
+ transform: translate(-2px, -2px);
178
+ box-shadow: 6px 6px 0 var(--bg);
179
+ }
180
+
181
+ .btn:active {
182
+ transform: translate(2px, 2px);
183
+ box-shadow: 2px 2px 0 var(--bg);
184
+ }
185
+
186
+ .btn-secondary {
187
+ background: var(--accent);
188
+ }
189
+
190
+ .table-section {
191
+ animation: slideUp 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55) 0.4s both;
192
+ }
193
+
194
+ .table-wrapper {
195
+ overflow-x: auto;
196
+ background: var(--surface);
197
+ border: var(--border) solid var(--secondary);
198
+ box-shadow: 8px 8px 0 var(--secondary);
199
+ }
200
+
201
+ table {
202
+ width: 100%;
203
+ border-collapse: collapse;
204
+ }
205
+
206
+ thead {
207
+ background: linear-gradient(135deg, var(--primary), var(--accent));
208
+ }
209
+
210
+ th {
211
+ padding: 1.5rem 1rem;
212
+ text-align: left;
213
+ font-weight: 900;
214
+ text-transform: uppercase;
215
+ letter-spacing: 1px;
216
+ color: var(--bg);
217
+ border-right: 3px solid var(--bg);
218
+ }
219
+
220
+ th:last-child {
221
+ border-right: none;
222
+ }
223
+
224
+ tbody tr {
225
+ border-bottom: 2px solid rgba(0, 212, 255, 0.2);
226
+ transition: all 0.3s ease;
227
+ animation: fadeIn 0.5s ease;
228
+ }
229
+
230
+ @keyframes fadeIn {
231
+ from { opacity: 0; }
232
+ to { opacity: 1; }
233
+ }
234
+
235
+ tbody tr:hover {
236
+ background: rgba(0, 255, 136, 0.1);
237
+ transform: translateX(5px);
238
+ }
239
+
240
+ td {
241
+ padding: 1.5rem 1rem;
242
+ color: var(--text);
243
+ }
244
+
245
+ .status {
246
+ display: inline-block;
247
+ padding: 0.5rem 1rem;
248
+ border: 3px solid;
249
+ font-weight: 900;
250
+ text-transform: uppercase;
251
+ font-size: 0.85rem;
252
+ letter-spacing: 1px;
253
+ }
254
+
255
+ .status-not_started {
256
+ background: var(--bg);
257
+ border-color: var(--accent);
258
+ color: var(--accent);
259
+ }
260
+
261
+ .status-processing {
262
+ background: var(--bg);
263
+ border-color: var(--primary);
264
+ color: var(--primary);
265
+ animation: pulse 1.5s ease-in-out infinite;
266
+ }
267
+
268
+ @keyframes pulse {
269
+ 0%, 100% { opacity: 1; }
270
+ 50% { opacity: 0.6; }
271
+ }
272
+
273
+ .status-completed {
274
+ background: var(--primary);
275
+ border-color: var(--primary);
276
+ color: var(--bg);
277
+ }
278
+
279
+ .status-failed {
280
+ background: var(--error);
281
+ border-color: var(--error);
282
+ color: var(--text);
283
+ }
284
+
285
+ .caption-cell {
286
+ max-width: 400px;
287
+ overflow: hidden;
288
+ text-overflow: ellipsis;
289
+ white-space: nowrap;
290
+ }
291
+
292
+ .caption-cell:hover {
293
+ white-space: normal;
294
+ cursor: pointer;
295
+ }
296
+
297
+ .empty-state {
298
+ text-align: center;
299
+ padding: 4rem 2rem;
300
+ color: var(--accent);
301
+ font-size: 1.2rem;
302
+ }
303
+
304
+ .refresh-btn {
305
+ position: fixed;
306
+ bottom: 2rem;
307
+ right: 2rem;
308
+ width: 60px;
309
+ height: 60px;
310
+ border-radius: 50%;
311
+ background: var(--secondary);
312
+ border: var(--border) solid var(--bg);
313
+ box-shadow: 4px 4px 0 var(--bg);
314
+ cursor: pointer;
315
+ transition: all 0.3s ease;
316
+ display: flex;
317
+ align-items: center;
318
+ justify-content: center;
319
+ font-size: 1.5rem;
320
+ z-index: 1000;
321
+ }
322
+
323
+ .refresh-btn:hover {
324
+ transform: rotate(180deg) scale(1.1);
325
+ box-shadow: 6px 6px 0 var(--bg);
326
+ }
327
+
328
+ @media (max-width: 768px) {
329
+ .container {
330
+ padding: 1rem;
331
+ }
332
+
333
+ .upload-section, .table-wrapper {
334
+ box-shadow: 4px 4px 0 var(--primary);
335
+ }
336
+
337
+ th, td {
338
+ padding: 1rem 0.5rem;
339
+ font-size: 0.9rem;
340
+ }
341
+
342
+ .caption-cell {
343
+ max-width: 200px;
344
+ }
345
+ }
346
+
347
+ .notification {
348
+ position: fixed;
349
+ top: 2rem;
350
+ right: 2rem;
351
+ padding: 1.5rem 2rem;
352
+ background: var(--primary);
353
+ color: var(--bg);
354
+ border: var(--border) solid var(--bg);
355
+ box-shadow: 6px 6px 0 var(--bg);
356
+ font-weight: 900;
357
+ z-index: 2000;
358
+ animation: slideInRight 0.5s ease, slideOutRight 0.5s ease 3.5s;
359
+ }
360
+
361
+ @keyframes slideInRight {
362
+ from {
363
+ transform: translateX(400px);
364
+ opacity: 0;
365
+ }
366
+ to {
367
+ transform: translateX(0);
368
+ opacity: 1;
369
+ }
370
+ }
371
+
372
+ @keyframes slideOutRight {
373
+ to {
374
+ transform: translateX(400px);
375
+ opacity: 0;
376
+ }
377
+ }
378
+ </style>
379
+ </head>
380
+ <body>
381
+ <div class="container">
382
+ <header>
383
+ <h1>Audio Caption Generator</h1>
384
+ <p class="subtitle">Transcribe • Process • Analyze</p>
385
+ </header>
386
+
387
+ <div class="upload-section">
388
+ <h2 style="margin-bottom: 1.5rem; color: var(--primary);">Upload Audio File</h2>
389
+ <div class="upload-zone" id="uploadZone">
390
+ <p style="font-size: 1.2rem; margin-bottom: 1rem;">📁 Drop audio file here or click to browse</p>
391
+ <p style="color: var(--accent); font-size: 0.9rem;">Supported: WAV, MP3, FLAC, OGG, M4A, AAC</p>
392
+ <input type="file" id="audioFile" accept=".wav,.mp3,.flac,.ogg,.m4a,.aac">
393
+ </div>
394
+ <button class="btn" id="uploadBtn" style="margin-top: 1.5rem; width: 100%;">
395
+ 🚀 Upload & Process
396
+ </button>
397
+ </div>
398
+
399
+ <div class="table-section">
400
+ <h2 style="margin-bottom: 1.5rem; color: var(--secondary);">Processing Queue</h2>
401
+ <div class="table-wrapper">
402
+ <table>
403
+ <thead>
404
+ <tr>
405
+ <th>Filename</th>
406
+ <th>Status</th>
407
+ <th>Caption</th>
408
+ <th>Created</th>
409
+ <th>Processed</th>
410
+ </tr>
411
+ </thead>
412
+ <tbody id="filesTable">
413
+ <tr>
414
+ <td colspan="5" class="empty-state">No files uploaded yet. Start by uploading an audio file!</td>
415
+ </tr>
416
+ </tbody>
417
+ </table>
418
+ </div>
419
+ </div>
420
+ </div>
421
+
422
+ <button class="refresh-btn" id="refreshBtn" title="Refresh">🔄</button>
423
+
424
+ <script>
425
+ const API_URL = 'http://localhost:5000/api';
426
+ let selectedFile = null;
427
+
428
+ // Upload zone interactions
429
+ const uploadZone = document.getElementById('uploadZone');
430
+ const fileInput = document.getElementById('audioFile');
431
+
432
+ uploadZone.addEventListener('click', () => fileInput.click());
433
+
434
+ uploadZone.addEventListener('dragover', (e) => {
435
+ e.preventDefault();
436
+ uploadZone.classList.add('dragging');
437
+ });
438
+
439
+ uploadZone.addEventListener('dragleave', () => {
440
+ uploadZone.classList.remove('dragging');
441
+ });
442
+
443
+ uploadZone.addEventListener('drop', (e) => {
444
+ e.preventDefault();
445
+ uploadZone.classList.remove('dragging');
446
+ const file = e.dataTransfer.files[0];
447
+ if (file) {
448
+ fileInput.files = e.dataTransfer.files;
449
+ selectedFile = file;
450
+ showNotification(`Selected: ${file.name}`);
451
+ }
452
+ });
453
+
454
+ fileInput.addEventListener('change', (e) => {
455
+ selectedFile = e.target.files[0];
456
+ if (selectedFile) {
457
+ showNotification(`Selected: ${selectedFile.name}`);
458
+ }
459
+ });
460
+
461
+ // Upload button
462
+ document.getElementById('uploadBtn').addEventListener('click', async () => {
463
+ if (!selectedFile) {
464
+ showNotification('Please select a file first!', 'error');
465
+ return;
466
+ }
467
+
468
+ const formData = new FormData();
469
+ formData.append('audio', selectedFile);
470
+
471
+ try {
472
+ const response = await fetch(`${API_URL}/upload`, {
473
+ method: 'POST',
474
+ body: formData
475
+ });
476
+
477
+ const data = await response.json();
478
+
479
+ if (response.ok) {
480
+ showNotification('File uploaded successfully! 🎉');
481
+ selectedFile = null;
482
+ fileInput.value = '';
483
+ loadFiles();
484
+ } else {
485
+ showNotification(data.error || 'Upload failed', 'error');
486
+ }
487
+ } catch (error) {
488
+ showNotification('Network error: ' + error.message, 'error');
489
+ }
490
+ });
491
+
492
+ // Load files
493
+ async function loadFiles() {
494
+ try {
495
+ const response = await fetch(`${API_URL}/files`);
496
+ const files = await response.json();
497
+
498
+ const tbody = document.getElementById('filesTable');
499
+
500
+ if (files.length === 0) {
501
+ tbody.innerHTML = '<tr><td colspan="5" class="empty-state">No files uploaded yet. Start by uploading an audio file!</td></tr>';
502
+ return;
503
+ }
504
+
505
+ tbody.innerHTML = files.map(file => `
506
+ <tr>
507
+ <td><strong>${file.filename}</strong></td>
508
+ <td><span class="status status-${file.status}">${file.status.replace('_', ' ')}</span></td>
509
+ <td class="caption-cell" title="${file.caption || 'N/A'}">${file.caption || '—'}</td>
510
+ <td>${new Date(file.created_at).toLocaleString()}</td>
511
+ <td>${file.processed_at ? new Date(file.processed_at).toLocaleString() : '—'}</td>
512
+ </tr>
513
+ `).join('');
514
+ } catch (error) {
515
+ console.error('Error loading files:', error);
516
+ }
517
+ }
518
+
519
+ // Refresh button
520
+ document.getElementById('refreshBtn').addEventListener('click', loadFiles);
521
+
522
+ // Auto-refresh every 3 seconds
523
+ setInterval(loadFiles, 3000);
524
+
525
+ // Show notification
526
+ function showNotification(message, type = 'success') {
527
+ const notification = document.createElement('div');
528
+ notification.className = 'notification';
529
+ notification.textContent = message;
530
+
531
+ if (type === 'error') {
532
+ notification.style.background = 'var(--error)';
533
+ }
534
+
535
+ document.body.appendChild(notification);
536
+
537
+ setTimeout(() => {
538
+ notification.remove();
539
+ }, 4000);
540
+ }
541
+
542
+ // Initial load
543
+ loadFiles();
544
+ </script>
545
+ </body>
546
+ </html>
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Flask==3.0.0
2
+ flask-cors==4.0.0
3
+ werkzeug==3.0.1
4
+
5
+ #STT
6
+ git+https://github.com/jebin2/STT.git#egg=stt-runner[fasterwhisper]
worker.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import time
3
+ import os
4
+ import subprocess
5
+ import json
6
+ import shlex
7
+ from datetime import datetime
8
+
9
+ CWD = "./"
10
+ PYTHON_PATH = "stt-transcribe"
11
+ STT_MODEL_NAME = "fasterwhispher"
12
+ POLL_INTERVAL = 5 # seconds
13
+
14
+ def process_audio(file_id, filepath):
15
+ """Process audio file using STT and return the transcription"""
16
+ try:
17
+ print(f"Processing file: {filepath}")
18
+
19
+ # Run STT command
20
+ command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
21
+
22
+ subprocess.run(
23
+ command,
24
+ shell=True,
25
+ executable="/bin/bash",
26
+ check=True,
27
+ cwd=CWD,
28
+ env={
29
+ **os.environ,
30
+ 'PYTHONUNBUFFERED': '1',
31
+ 'CUDA_LAUNCH_BLOCKING': '1',
32
+ 'USE_CPU_IF_POSSIBLE': 'true'
33
+ }
34
+ )
35
+
36
+ # Read transcription result
37
+ output_path = f'{CWD}/temp_dir/output_transcription.json'
38
+ with open(output_path, 'r') as file:
39
+ result = json.loads(file.read().strip())
40
+
41
+ # Extract caption text (adjust based on your actual output format)
42
+ caption = result.get('text', '') or result.get('transcription', '') or str(result)
43
+
44
+ return caption, None
45
+
46
+ except Exception as e:
47
+ print(f"Error processing file {file_id}: {str(e)}")
48
+ return None, str(e)
49
+
50
+ def update_status(file_id, status, caption=None, error=None):
51
+ """Update the status of a file in the database"""
52
+ conn = sqlite3.connect('audio_captions.db')
53
+ c = conn.cursor()
54
+
55
+ if status == 'completed':
56
+ c.execute('''UPDATE audio_files
57
+ SET status = ?, caption = ?, processed_at = ?
58
+ WHERE id = ?''',
59
+ (status, caption, datetime.now().isoformat(), file_id))
60
+ elif status == 'failed':
61
+ c.execute('''UPDATE audio_files
62
+ SET status = ?, caption = ?, processed_at = ?
63
+ WHERE id = ?''',
64
+ (status, f"Error: {error}", datetime.now().isoformat(), file_id))
65
+ else:
66
+ c.execute('UPDATE audio_files SET status = ? WHERE id = ?', (status, file_id))
67
+
68
+ conn.commit()
69
+ conn.close()
70
+
71
+ def worker_loop():
72
+ """Main worker loop that processes audio files"""
73
+ print("STT Worker started. Polling for new audio files...")
74
+
75
+ while True:
76
+ try:
77
+ # Get next unprocessed file
78
+ conn = sqlite3.connect('audio_captions.db')
79
+ conn.row_factory = sqlite3.Row
80
+ c = conn.cursor()
81
+ c.execute('''SELECT * FROM audio_files
82
+ WHERE status = 'not_started'
83
+ ORDER BY created_at ASC
84
+ LIMIT 1''')
85
+ row = c.fetchone()
86
+ conn.close()
87
+
88
+ if row:
89
+ file_id = row['id']
90
+ filepath = row['filepath']
91
+ filename = row['filename']
92
+
93
+ print(f"\n{'='*60}")
94
+ print(f"Processing: {filename}")
95
+ print(f"ID: {file_id}")
96
+ print(f"{'='*60}")
97
+
98
+ # Update status to processing
99
+ update_status(file_id, 'processing')
100
+
101
+ # Process the audio file
102
+ caption, error = process_audio(file_id, filepath)
103
+
104
+ if caption:
105
+ print(f"✓ Successfully processed: {filename}")
106
+ print(f"Caption: {caption[:100]}...")
107
+ update_status(file_id, 'completed', caption=caption)
108
+ else:
109
+ print(f"✗ Failed to process: {filename}")
110
+ print(f"Error: {error}")
111
+ update_status(file_id, 'failed', error=error)
112
+ else:
113
+ # No files to process, sleep for a bit
114
+ time.sleep(POLL_INTERVAL)
115
+
116
+ except Exception as e:
117
+ print(f"Worker error: {str(e)}")
118
+ time.sleep(POLL_INTERVAL)
119
+
120
+ if __name__ == '__main__':
121
+ # Initialize database if it doesn't exist
122
+ if not os.path.exists('audio_captions.db'):
123
+ print("Database not found. Please run app.py first to initialize.")
124
+ else:
125
+ worker_loop()