prthm11 commited on
Commit
4207399
·
verified ·
1 Parent(s): 105dda6

Upload 12 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12.2
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ COPY . .
7
+
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["python", "merged.py"]
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_from_directory, render_template
2
+ import threading
3
+ import time
4
+ import os
5
+ import queue
6
+ from pathlib import Path
7
+ import pyaudio
8
+ from werkzeug.utils import secure_filename
9
+ from rec_transcribe_extension import Transcriber, diarization_hook
10
+ from rec_transcribe_extension import (
11
+ list_input_devices,
12
+ run_recording,
13
+ OUTPUT_DIR,
14
+ CHUNKS_DIR,
15
+ FINAL_WAV,)
16
+
17
+ app = Flask(__name__)
18
+
19
+ recording_thread = None
20
+ recording_running = False
21
+
22
+ recording_status = {
23
+ "recording": False,
24
+ "live_segments": []
25
+ }
26
+
27
+ # ------ Device Listing API ------
28
+ @app.route("/api/devices", methods=["GET"])
29
+ def api_devices():
30
+ pa = pyaudio.PyAudio()
31
+ devices = []
32
+ for i in range(pa.get_device_count()):
33
+ dev = pa.get_device_info_by_index(i)
34
+ if dev.get("maxInputChannels", 0) > 0:
35
+ devices.append({"index": dev["index"], "name": dev["name"]})
36
+ pa.terminate()
37
+ return jsonify({"devices": devices})
38
+
39
+ # --- Start recording ---
40
+ @app.route("/api/start-recording", methods=["POST"])
41
+ def api_start_recording():
42
+ global recording_thread, stop_event, recording_status
43
+ data = request.json
44
+ # Validate required fields
45
+ try:
46
+ mic = int(data.get("mic"))
47
+ except Exception:
48
+ return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
49
+
50
+ # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
51
+ sys = None
52
+ if data.get("sys") not in (None, "", "null"):
53
+ try:
54
+ sys = int(data.get("sys"))
55
+ except Exception:
56
+ return jsonify({"error": "Invalid 'sys' parameter"}), 400
57
+
58
+ chunk_secs = int(data.get("chunk_secs", 5))
59
+ model = data.get("model", "medium")
60
+ no_transcribe = bool(data.get("no_transcribe", False))
61
+ if recording_status["recording"]:
62
+ return jsonify({"error": "Already recording"}), 400
63
+
64
+ # --- Validate that requested devices exist and have input channels ---
65
+ try:
66
+ pa = pyaudio.PyAudio()
67
+ except Exception as e:
68
+ return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
69
+
70
+ def device_is_valid(device_index):
71
+ try:
72
+ dev = pa.get_device_info_by_index(device_index)
73
+ return dev.get("maxInputChannels", 0) > 0
74
+ except Exception:
75
+ return False
76
+
77
+ if not device_is_valid(mic):
78
+ pa.terminate()
79
+ return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
80
+
81
+ if sys is not None and not device_is_valid(sys):
82
+ pa.terminate()
83
+ return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
84
+
85
+ pa.terminate()
86
+
87
+ # Reset state
88
+ recording_status["recording"] = True
89
+ recording_status["live_segments"] = []
90
+ stop_event = threading.Event()
91
+
92
+ def run():
93
+ # Patch: update live_segments after each chunk
94
+ from rec_transcribe_extension import chunk_writer_and_transcribe_worker
95
+
96
+ # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
97
+ import rec_transcribe_extension as rte
98
+ orig_worker = rte.chunk_writer_and_transcribe_worker
99
+
100
+ def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
101
+ while True:
102
+ try:
103
+ filename, frames = in_queue.get(timeout=1.0)
104
+ except queue.Empty:
105
+ if stop_event.is_set() and in_queue.empty():
106
+ break
107
+ continue
108
+
109
+ rte.save_wav_from_frames(
110
+ filename, frames, nchannels=rte.CHANNELS)
111
+ final_frames_list.extend(frames)
112
+
113
+ diar = rte.diarization_hook(str(filename))
114
+ diar_segments = diar if diar else []
115
+
116
+ # Transcribe chunk and get segments with timestamps
117
+ if transcriber and transcriber.model:
118
+ try:
119
+ segments, info = transcriber.model.transcribe(
120
+ str(filename), beam_size=5)
121
+ for seg in segments:
122
+ seg_start = seg.start
123
+ seg_end = seg.end
124
+ seg_text = seg.text.strip()
125
+ speaker = "Unknown"
126
+ for d_start, d_end, d_speaker in diar_segments:
127
+ if (seg_start < d_end) and (seg_end > d_start):
128
+ speaker = d_speaker
129
+ break
130
+ # Update live_segments for frontend
131
+ recording_status["live_segments"].append({
132
+ "start": float(seg_start),
133
+ "end": float(seg_end),
134
+ "speaker": str(speaker),
135
+ "text": seg_text
136
+ })
137
+ # Write to transcript file as before
138
+ line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
139
+ with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
140
+ tf.write(line)
141
+ except Exception as e:
142
+ print(f"Transcription error for {filename.name}: {e}")
143
+ print("Chunk writer/transcriber worker exiting.")
144
+
145
+ rte.chunk_writer_and_transcribe_worker = patched_worker
146
+ try:
147
+ rte.stop_event = stop_event
148
+ run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
149
+ model_name=model, no_transcribe=no_transcribe)
150
+ finally:
151
+ rte.chunk_writer_and_transcribe_worker = orig_worker
152
+ recording_status["recording"] = False
153
+
154
+ recording_thread = threading.Thread(target=run, daemon=True)
155
+ recording_thread.start()
156
+ return jsonify({"ok": True})
157
+
158
+ # --- Stop recording ---
159
+ @app.route("/api/stop-recording", methods=["POST"])
160
+ def api_stop_recording():
161
+ global stop_event
162
+ if stop_event:
163
+ stop_event.set()
164
+ return jsonify({"ok": True})
165
+
166
+ # --- Poll status ---
167
+ @app.route("/api/recording-status")
168
+ def api_recording_status():
169
+ return jsonify(recording_status)
170
+
171
+ # # serve saved uploads at /uploads/<filename>
172
+ # @app.route('/uploads/<path:filename>')
173
+ # def serve_uploaded(filename):
174
+ # return send_from_directory(str(OUTPUT_DIR), filename)
175
+
176
+ # # --- upload pre-recorded files ---
177
+ # @app.route("/api/upload", methods=["POST"])
178
+ # def api_upload_file():
179
+ # """
180
+ # Accept a single file (form-data 'file'), save it into OUTPUT_DIR and return json
181
+ # { ok: True, filename: "<saved_name>", url: "/static/<saved_name>" }.
182
+ # """
183
+ # if 'file' not in request.files:
184
+ # return jsonify({"error": "No file provided"}), 400
185
+ # f = request.files['file']
186
+ # if f.filename == '':
187
+ # return jsonify({"error": "Empty filename"}), 400
188
+
189
+ # safe_name = secure_filename(f.filename)
190
+ # # prefix timestamp to avoid collisions
191
+ # ts = int(time.time() * 1000)
192
+ # saved_name = f"{ts}_{safe_name}"
193
+ # saved_path = OUTPUT_DIR / saved_name
194
+ # try:
195
+ # f.save(str(saved_path))
196
+ # except Exception as e:
197
+ # return jsonify({"error": f"Failed to save file: {e}"}), 500
198
+
199
+ # return jsonify({"ok": True, "filename": saved_name, "url": f"/static/{saved_name}"})
200
+
201
+ # # --- Start server-side paced transcription for a saved WAV/MP3 file ---
202
+ # @app.route("/api/start-transcribe-file", methods=["POST"])
203
+ # def api_start_transcribe_file():
204
+ # """
205
+ # POST JSON { filename: "<saved_name>" }
206
+ # Spawns a background thread that transcribes the file using the Transcriber,
207
+ # and appends transcribed segments (with start/end/speaker/text) into
208
+ # recording_status["live_segments"] while setting recording_status["recording"]=True.
209
+ # The worker will pace segments to approximate 'live' streaming using seg.start timestamps.
210
+ # """
211
+ # global recording_status
212
+ # data = request.json or {}
213
+ # filename = data.get("filename")
214
+ # print("DEBUG: /api/start-transcribe-file called with:", filename, flush=True)
215
+
216
+ # if not filename:
217
+ # return jsonify({"error": "Missing 'filename'"}), 400
218
+
219
+ # file_path = OUTPUT_DIR / filename
220
+ # if not file_path.exists():
221
+ # return jsonify({"error": "File not found on server"}), 404
222
+
223
+ # # prevent concurrent transcription runs
224
+ # if recording_status.get("recording"):
225
+ # return jsonify({"error": "Another transcription/recording is already running"}), 400
226
+
227
+ # def worker():
228
+ # try:
229
+ # recording_status["recording"] = True
230
+ # recording_status["live_segments"] = []
231
+
232
+ # transcriber = Transcriber()
233
+ # if not transcriber.model:
234
+ # # model not loaded/available
235
+ # recording_status["recording"] = False
236
+ # print("Transcription model not available; cannot transcribe file.")
237
+ # return
238
+
239
+ # # perform diarization if available
240
+ # diar_segments = diarization_hook(str(file_path)) or []
241
+
242
+ # # get segments from model
243
+ # try:
244
+ # segments, info = transcriber.model.transcribe(str(file_path), beam_size=5)
245
+ # except Exception as e:
246
+ # print("Error during transcription:", e)
247
+ # recording_status["recording"] = False
248
+ # return
249
+
250
+ # # Stream the segments into recording_status with timing
251
+ # start_clock = time.time()
252
+ # for seg in segments:
253
+ # # seg.start is seconds into the audio
254
+ # wait_for = seg.start - (time.time() - start_clock)
255
+ # if wait_for > 0:
256
+ # time.sleep(wait_for)
257
+
258
+ # # map speaker using diarization segments (best-effort overlap)
259
+ # speaker = "Unknown"
260
+ # for d_start, d_end, d_label in diar_segments:
261
+ # if (seg.start < d_end) and (seg.end > d_start):
262
+ # speaker = d_label
263
+ # break
264
+
265
+ # seg_obj = {
266
+ # "start": float(seg.start),
267
+ # "end": float(seg.end),
268
+ # "speaker": str(speaker),
269
+ # "text": seg.text.strip()
270
+ # }
271
+
272
+ # # append to shared status for frontend polling
273
+ # recording_status.setdefault("live_segments", []).append(seg_obj)
274
+
275
+ # # also append to transcript file for persistence (optional)
276
+ # with open(rec_transcribe_extension.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
277
+ # line = f"[{filename}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
278
+ # tf.write(line)
279
+
280
+ # # done streaming
281
+ # recording_status["recording"] = False
282
+ # except Exception as e:
283
+ # print("Error in transcription worker:", e)
284
+ # recording_status["recording"] = False
285
+
286
+ # t = threading.Thread(target=worker, daemon=True)
287
+ # t.start()
288
+ # return jsonify({"ok": True})
289
+
290
+ # --- List final files ---
291
+ @app.route("/api/final-files")
292
+ def api_final_files():
293
+ files = []
294
+ out_dir = OUTPUT_DIR
295
+ for fname in os.listdir(out_dir):
296
+ if fname.endswith(".wav") or fname.endswith(".txt"):
297
+ files.append(
298
+ {"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"})
299
+ return jsonify({"files": files})
300
+
301
+ # --- Serve static files (WAV, TXT) ---
302
+ @app.route('/static/<path:filename>')
303
+ def static_files(filename):
304
+ return send_from_directory(OUTPUT_DIR, filename)
305
+
306
+ # --- Serve the frontend ---
307
+ @app.route("/")
308
+ def index():
309
+ return render_template("index2.html")
310
+
311
+ if __name__ == "__main__":
312
+ app.run(port=5000, debug=True)
app2.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ """
3
+ Flask app to:
4
+ 1) serve the provided upload template,
5
+ 2) accept .mp3/.wav uploads and show an audio player,
6
+ 3) start/stop recording from a system loopback device when the audio element plays/pauses,
7
+ 4) stream live transcription back to the browser via Server-Sent Events (SSE).
8
+
9
+ Notes:
10
+ - Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.
11
+ - Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.
12
+ - This app assumes it runs on the same machine that has access to the local audio devices.
13
+ """
14
+ import os
15
+ import time
16
+ import threading
17
+ import pathlib
18
+ from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
19
+ from werkzeug.utils import secure_filename
20
+
21
+ # import your recorder/transcriber helper (uploaded by you)
22
+ import rec_transcribe_extension as rte
23
+
24
+ UPLOAD_FOLDER = "uploads"
25
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
26
+
27
+ ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
28
+
29
+ app = Flask(__name__, static_folder=None)
30
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
31
+
32
+ # Globals for recording thread management
33
+ recording_thread = None
34
+ recording_lock = threading.Lock()
35
+
36
+
37
+ def allowed_file(filename):
38
+ ext = pathlib.Path(filename).suffix.lower()
39
+ return ext in ALLOWED_EXT
40
+
41
+
42
+ def find_system_loopback_index():
43
+ """
44
+ Try to find a likely loopback / system audio input device.
45
+ Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
46
+ 'virtual', 'audio cable'. Otherwise fallback to default input device.
47
+ """
48
+ pa = None
49
+ try:
50
+ import pyaudio
51
+ pa = pyaudio.PyAudio()
52
+ except Exception:
53
+ return None
54
+
55
+ keywords = ["loop", "stereo", "mix", "what u hear",
56
+ "virtual", "audio cable", "loopback", "monitor"]
57
+ best_idx = None
58
+ for i in range(pa.get_device_count()):
59
+ try:
60
+ dev = pa.get_device_info_by_index(i)
61
+ name = (dev.get("name") or "").lower()
62
+ max_in = dev.get("maxInputChannels", 0)
63
+ if max_in <= 0:
64
+ continue
65
+ for kw in keywords:
66
+ if kw in name:
67
+ best_idx = int(dev["index"])
68
+ pa.terminate()
69
+ return best_idx
70
+ except Exception:
71
+ continue
72
+
73
+ # fallback: default input device
74
+ try:
75
+ default_info = pa.get_default_input_device_info()
76
+ idx = int(default_info.get("index"))
77
+ pa.terminate()
78
+ return idx
79
+ except Exception:
80
+ if pa:
81
+ pa.terminate()
82
+ return None
83
+
84
+
85
+ @app.route("/", methods=["GET"])
86
+ def index():
87
+ return render_template("index2_upload.html")
88
+
89
+
90
+ @app.route("/upload", methods=["POST"])
91
+ def upload():
92
+ if 'file' not in request.files:
93
+ return jsonify(success=False, error="No file part"), 400
94
+ f = request.files['file']
95
+ if f.filename == '':
96
+ return jsonify(success=False, error="Empty filename"), 400
97
+ filename = secure_filename(f.filename)
98
+ if not allowed_file(filename):
99
+ return jsonify(success=False, error="Extension not allowed"), 400
100
+
101
+ # avoid collisions by prefixing timestamp
102
+ ts = int(time.time() * 1000)
103
+ filename = f"{ts}_{filename}"
104
+ save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
105
+ f.save(save_path)
106
+ url = f"/uploads/{filename}"
107
+ return jsonify(success=True, url=url, filename=filename)
108
+
109
+
110
+ @app.route("/uploads/<path:filename>")
111
+ def uploaded_file(filename):
112
+ return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
113
+
114
+
115
+ @app.route("/start", methods=["POST"])
116
+ def start_recording():
117
+ """
118
+ Start a background thread which calls rec_transcribe_extension.run_recording(...)
119
+ We try to detect a loopback device; if not found we pick the default input device.
120
+ """
121
+ global recording_thread
122
+ body = request.get_json(force=True, silent=True) or {}
123
+ filename = body.get('filename')
124
+
125
+ # Basic check: uploaded file exists (we don't actually play the file on the server,
126
+ # but it's a sanity check so user didn't start without uploading)
127
+ if filename:
128
+ if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
129
+ return jsonify(success=False, error="Uploaded file not found on server"), 400
130
+
131
+ with recording_lock:
132
+ # if there's an active recording, return ok
133
+ if recording_thread and recording_thread.is_alive():
134
+ return jsonify(success=True, message="Recording already running")
135
+ # clear any previous stop_event
136
+ try:
137
+ if hasattr(rte, 'stop_event'):
138
+ rte.stop_event = threading.Event() # new event the run_recording will wait on
139
+ except Exception:
140
+ pass
141
+
142
+ # choose device: prefer loopback
143
+ dev_index = find_system_loopback_index()
144
+ if dev_index is None:
145
+ return jsonify(success=False, error="No suitable audio input device found on server"), 500
146
+
147
+ # Start the recording in a background thread
148
+ def target():
149
+ try:
150
+ from rec_transcribe_extension import chunk_writer_and_transcribe_worker
151
+ import rec_transcribe_extension as rte
152
+ orig_worker = rte.chunk_writer_and_transcribe_worker
153
+
154
+ def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
155
+ while True:
156
+ try:
157
+ filename, frames = in_queue.get(timeout=1.0)
158
+ except queue.Empty:
159
+ if rte.stop_event.is_set() and in_queue.empty():
160
+ break
161
+ continue
162
+
163
+ rte.save_wav_from_frames(
164
+ filename, frames, nchannels=rte.CHANNELS)
165
+ final_frames_list.extend(frames)
166
+
167
+ diar_segments = rte.diarization_hook(str(filename)) or []
168
+
169
+ if transcriber and transcriber.model:
170
+ try:
171
+ segments, info = transcriber.model.transcribe(
172
+ str(filename), beam_size=5)
173
+ for seg in segments:
174
+ seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
175
+ speaker = "Unknown"
176
+ for d_start, d_end, d_speaker in diar_segments:
177
+ if (seg_start < d_end) and (seg_end > d_start):
178
+ speaker = d_speaker
179
+ break
180
+ # Write formatted diarization line
181
+ line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
182
+ with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
183
+ tf.write(line)
184
+ except Exception as e:
185
+ print(f"Transcription error for {filename}: {e}")
186
+
187
+ print("Patched worker exiting.")
188
+
189
+ # apply patch
190
+ rte.chunk_writer_and_transcribe_worker = patched_worker
191
+ try:
192
+ rte.run_recording(mic_index=dev_index, sys_index=None,
193
+ chunk_secs=getattr(
194
+ rte, 'CHUNK_DURATION_SECS', 3),
195
+ model_name=getattr(rte, 'MODEL_NAME', None),
196
+ no_transcribe=False)
197
+ finally:
198
+ rte.chunk_writer_and_transcribe_worker = orig_worker
199
+ except Exception as e:
200
+ print("run_recording exception:", e)
201
+
202
+
203
+ @app.route("/stop", methods=["POST"])
204
+ def stop_recording():
205
+ """
206
+ Signal the rec_transcribe_extension stop_event to stop gracefully.
207
+ """
208
+ global recording_thread
209
+ with recording_lock:
210
+ # set the stop_event in module
211
+ if hasattr(rte, 'stop_event') and rte.stop_event is not None:
212
+ try:
213
+ rte.stop_event.set()
214
+ except Exception:
215
+ pass
216
+ return jsonify(success=True, message="Stop signal sent")
217
+
218
+
219
+ def tail_transcript_file(path, stop_cond_fn=None):
220
+ """
221
+ Generator that tails the transcript file and yields SSE data lines.
222
+ If file doesn't exist yet, yield a short status message then keep waiting.
223
+ stop_cond_fn is a callable that when returns True will break.
224
+ """
225
+ last_pos = 0
226
+ sent_initial = False
227
+ while True:
228
+ if stop_cond_fn and stop_cond_fn():
229
+ break
230
+ if os.path.exists(path):
231
+ with open(path, "r", encoding="utf-8", errors="ignore") as fh:
232
+ fh.seek(last_pos)
233
+ lines = fh.readlines()
234
+ if lines:
235
+ for ln in lines:
236
+ ln = ln.strip()
237
+ if ln:
238
+ yield f"data: {ln}\n\n"
239
+ last_pos = fh.tell()
240
+ sent_initial = True
241
+ else:
242
+ # no new lines
243
+ time.sleep(0.25)
244
+ else:
245
+ if not sent_initial:
246
+ yield "data: [info] Transcript file not yet created. Waiting...\n\n"
247
+ sent_initial = True
248
+ time.sleep(0.5)
249
+ # final notification
250
+ yield "data: [info] Transcription ended.\n\n"
251
+
252
+
253
+ @app.route("/events")
254
+ def events():
255
+ """
256
+ SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
257
+ The stream ends when the module stop_event is set and the background recording thread finishes.
258
+ """
259
+ transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
260
+ if not transcript_path:
261
+ return Response("No transcript file configured", status=500)
262
+ transcript_path = str(transcript_path)
263
+
264
+ def stop_fn():
265
+ # stop when the recording thread is no longer alive AND the module stop_event is set
266
+ cond = False
267
+ try:
268
+ cond = (hasattr(rte, 'stop_event')
269
+ and rte.stop_event is not None and rte.stop_event.is_set())
270
+ except Exception:
271
+ cond = False
272
+ # also stop if thread finished
273
+ t_alive = recording_thread.is_alive() if recording_thread is not None else False
274
+ # If stop requested and thread not alive -> end stream
275
+ return (cond and not t_alive)
276
+
277
+ return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
278
+ mimetype="text/event-stream")
279
+
280
+
281
+ @app.route("/status")
282
+ def status():
283
+ running = False
284
+ if recording_thread and recording_thread.is_alive():
285
+ running = True
286
+ return jsonify(running=running)
287
+
288
+
289
+ if __name__ == "__main__":
290
+ # run on localhost for local usage
291
+ app.run(host="0.0.0.0", port=7860, threaded=True)
merged.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import os
3
+ import time
4
+ import threading
5
+ import queue
6
+ import pathlib
7
+ import pyaudio
8
+ from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
9
+ from werkzeug.utils import secure_filename
10
+
11
+ # your helper module
12
+ import rec_transcribe_extension as rte
13
+ from rec_transcribe_extension import Transcriber, diarization_hook, run_recording, OUTPUT_DIR
14
+
15
+ app = Flask(__name__)
16
+ UPLOAD_FOLDER = "uploads"
17
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
18
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
19
+
20
+ ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
21
+
22
+
23
+ def allowed_file(filename: str) -> bool:
24
+ """Check if file extension is allowed"""
25
+ ext = pathlib.Path(filename).suffix.lower()
26
+ return ext in ALLOWED_EXT
27
+
28
+
29
+ # ---------------- Shared state ----------------
30
+ recording_thread = None
31
+ recording_running = False
32
+ recording_lock = threading.Lock()
33
+
34
+ recording_status = {
35
+ "recording": False,
36
+ "live_segments": []
37
+ }
38
+
39
+ # ---------------- Landing + Frontend ----------------
40
+
41
+
42
+ @app.route("/")
43
+ def landing():
44
+ return render_template("landing.html")
45
+
46
+
47
+ @app.route("/live")
48
+ def live_page():
49
+ return render_template("index2.html")
50
+
51
+
52
+ @app.route("/upload")
53
+ def upload_page():
54
+ return render_template("index2_upload.html")
55
+
56
+ # ---------------- Device listing ----------------
57
+
58
+
59
+ @app.route("/api/devices", methods=["GET"])
60
+ def api_devices():
61
+ pa = pyaudio.PyAudio()
62
+ devices = []
63
+ for i in range(pa.get_device_count()):
64
+ dev = pa.get_device_info_by_index(i)
65
+ if dev.get("maxInputChannels", 0) > 0:
66
+ devices.append({"index": dev["index"], "name": dev["name"]})
67
+ pa.terminate()
68
+ return jsonify({"devices": devices})
69
+
70
+ # --- Start recording ---
71
+ @app.route("/api/start-recording", methods=["POST"])
72
+ def api_start_recording():
73
+ global recording_thread, stop_event, recording_status
74
+ data = request.json
75
+ # Validate required fields
76
+ try:
77
+ mic = int(data.get("mic"))
78
+ except Exception:
79
+ return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
80
+
81
+ # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
82
+ sys = None
83
+ if data.get("sys") not in (None, "", "null"):
84
+ try:
85
+ sys = int(data.get("sys"))
86
+ except Exception:
87
+ return jsonify({"error": "Invalid 'sys' parameter"}), 400
88
+
89
+ chunk_secs = int(data.get("chunk_secs", 5))
90
+ model = data.get("model", "medium")
91
+ no_transcribe = bool(data.get("no_transcribe", False))
92
+ if recording_status["recording"]:
93
+ return jsonify({"error": "Already recording"}), 400
94
+
95
+ # --- Validate that requested devices exist and have input channels ---
96
+ try:
97
+ pa = pyaudio.PyAudio()
98
+ except Exception as e:
99
+ return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
100
+
101
+ def device_is_valid(device_index):
102
+ try:
103
+ dev = pa.get_device_info_by_index(device_index)
104
+ return dev.get("maxInputChannels", 0) > 0
105
+ except Exception:
106
+ return False
107
+
108
+ if not device_is_valid(mic):
109
+ pa.terminate()
110
+ return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
111
+
112
+ if sys is not None and not device_is_valid(sys):
113
+ pa.terminate()
114
+ return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
115
+
116
+ pa.terminate()
117
+
118
+ # Reset state
119
+ recording_status["recording"] = True
120
+ recording_status["live_segments"] = []
121
+ stop_event = threading.Event()
122
+
123
+ def run():
124
+ # Patch: update live_segments after each chunk
125
+ from rec_transcribe_extension import chunk_writer_and_transcribe_worker
126
+
127
+ # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
128
+ import rec_transcribe_extension as rte
129
+ orig_worker = rte.chunk_writer_and_transcribe_worker
130
+
131
+ def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
132
+ while True:
133
+ try:
134
+ filename, frames = in_queue.get(timeout=1.0)
135
+ except queue.Empty:
136
+ if stop_event.is_set() and in_queue.empty():
137
+ break
138
+ continue
139
+
140
+ rte.save_wav_from_frames(
141
+ filename, frames, nchannels=rte.CHANNELS)
142
+ final_frames_list.extend(frames)
143
+
144
+ diar = rte.diarization_hook(str(filename))
145
+ diar_segments = diar if diar else []
146
+
147
+ # Transcribe chunk and get segments with timestamps
148
+ if transcriber and transcriber.model:
149
+ try:
150
+ segments, info = transcriber.model.transcribe(
151
+ str(filename), beam_size=5)
152
+ for seg in segments:
153
+ seg_start = seg.start
154
+ seg_end = seg.end
155
+ seg_text = seg.text.strip()
156
+ speaker = "Unknown"
157
+ for d_start, d_end, d_speaker in diar_segments:
158
+ if (seg_start < d_end) and (seg_end > d_start):
159
+ speaker = d_speaker
160
+ break
161
+ # Update live_segments for frontend
162
+ recording_status["live_segments"].append({
163
+ "start": float(seg_start),
164
+ "end": float(seg_end),
165
+ "speaker": str(speaker),
166
+ "text": seg_text
167
+ })
168
+ # Write to transcript file as before
169
+ line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
170
+ with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
171
+ tf.write(line)
172
+ except Exception as e:
173
+ print(f"Transcription error for {filename.name}: {e}")
174
+ print("Chunk writer/transcriber worker exiting.")
175
+
176
+ rte.chunk_writer_and_transcribe_worker = patched_worker
177
+ try:
178
+ rte.stop_event = stop_event
179
+ run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
180
+ model_name=model, no_transcribe=no_transcribe)
181
+ finally:
182
+ rte.chunk_writer_and_transcribe_worker = orig_worker
183
+ recording_status["recording"] = False
184
+
185
+ recording_thread = threading.Thread(target=run, daemon=True)
186
+ recording_thread.start()
187
+ return jsonify({"ok": True})
188
+
189
+ # # ---------------- Recording APIs ----------------
190
+ # @app.route("/api/start-recording", methods=["POST"])
191
+ # def api_start_recording():
192
+ # global recording_thread, recording_status
193
+ # data = request.json or {}
194
+
195
+ # mic = int(data.get("mic", -1))
196
+ # sys = data.get("sys")
197
+ # if sys in (None, "", "null"):
198
+ # sys = None
199
+ # else:
200
+ # sys = int(sys)
201
+
202
+ # chunk_secs = int(data.get("chunk_secs", 5))
203
+ # model = data.get("model", "medium")
204
+ # no_transcribe = bool(data.get("no_transcribe", False))
205
+
206
+ # if recording_status["recording"]:
207
+ # return jsonify({"error": "Already recording"}), 400
208
+
209
+ # # validate devices
210
+ # pa = pyaudio.PyAudio()
211
+ # def valid(dev_idx):
212
+ # try:
213
+ # dev = pa.get_device_info_by_index(dev_idx)
214
+ # return dev.get("maxInputChannels", 0) > 0
215
+ # except Exception:
216
+ # return False
217
+ # if not valid(mic):
218
+ # pa.terminate()
219
+ # return jsonify({"error": f"Mic device {mic} invalid"}), 400
220
+ # if sys is not None and not valid(sys):
221
+ # pa.terminate()
222
+ # return jsonify({"error": f"System device {sys} invalid"}), 400
223
+ # pa.terminate()
224
+
225
+ # # reset state
226
+ # recording_status["recording"] = True
227
+ # recording_status["live_segments"] = []
228
+ # rte.stop_event = threading.Event()
229
+
230
+ # def run():
231
+ # try:
232
+ # run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
233
+ # model_name=model, no_transcribe=no_transcribe)
234
+ # finally:
235
+ # recording_status["recording"] = False
236
+
237
+ # recording_thread = threading.Thread(target=run, daemon=True)
238
+ # recording_thread.start()
239
+ # return jsonify({"ok": True})
240
+
241
+
242
+ @app.route("/api/stop-recording", methods=["POST"])
243
+ def api_stop_recording():
244
+ if hasattr(rte, "stop_event") and rte.stop_event:
245
+ rte.stop_event.set()
246
+ return jsonify({"ok": True})
247
+
248
+
249
+ @app.route("/api/recording-status")
250
+ def api_recording_status():
251
+ return jsonify({
252
+ "recording": recording_status.get("recording", False),
253
+ "live_segments": recording_status.get("live_segments", [])
254
+ })
255
+
256
+ # ---------------- Upload-based APIs ----------------
257
+
258
+
259
+ @app.route("/api/upload", methods=["POST"])
260
+ def api_upload_file():
261
+ if 'file' not in request.files:
262
+ return jsonify(success=False, error="No file part"), 400
263
+ f = request.files['file']
264
+ if f.filename == '':
265
+ return jsonify(success=False, error="Empty filename"), 400
266
+ filename = secure_filename(f.filename)
267
+ if not allowed_file(filename):
268
+ return jsonify(success=False, error="Extension not allowed"), 400
269
+
270
+ # avoid collisions by prefixing timestamp
271
+ ts = int(time.time() * 1000)
272
+ filename = f"{ts}_{filename}"
273
+ save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
274
+ f.save(save_path)
275
+ url = f"/uploads/{filename}"
276
+ return jsonify(success=True, url=url, filename=filename)
277
+
278
+ # ---------------- File serving ----------------
279
+
280
+
281
+ @app.route("/uploads/<path:filename>")
282
+ def uploaded_file(filename):
283
+ return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
284
+
285
+ # @app.route("/api/start-transcribe-file", methods=["POST"])
286
+ # def api_start_transcribe_file():
287
+ # data = request.json or {}
288
+ # filename = data.get("filename")
289
+ # file_path = OUTPUT_DIR / filename
290
+ # if not file_path.exists():
291
+ # return jsonify({"error": "File not found"}), 404
292
+
293
+ # if recording_status.get("recording"):
294
+ # return jsonify({"error": "Busy"}), 400
295
+
296
+ # def worker():
297
+ # try:
298
+ # recording_status["recording"] = True
299
+ # recording_status["live_segments"] = []
300
+ # transcriber = Transcriber()
301
+ # diar_segments = diarization_hook(str(file_path)) or []
302
+ # segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
303
+ # start_clock = time.time()
304
+ # for seg in segments:
305
+ # wait_for = seg.start - (time.time() - start_clock)
306
+ # if wait_for > 0:
307
+ # time.sleep(wait_for)
308
+ # speaker = "Unknown"
309
+ # for d_start, d_end, d_label in diar_segments:
310
+ # if (seg.start < d_end) and (seg.end > d_start):
311
+ # speaker = d_label
312
+ # break
313
+
314
+ # seg_obj = {
315
+ # "start": float(seg.start),
316
+ # "end": float(seg.end),
317
+ # "speaker": speaker,
318
+ # "text": seg.text.strip()
319
+ # }
320
+ # recording_status["live_segments"].append(seg_obj)
321
+
322
+ # # --- NEW: also append to transcript file so /events SSE can stream it ---
323
+ # line = f"{seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
324
+ # with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
325
+ # tf.write(line)
326
+
327
+ # recording_status["recording"] = False
328
+ # except Exception as e:
329
+ # print("Error in file transcription:", e)
330
+ # recording_status["recording"] = False
331
+
332
+ # threading.Thread(target=worker, daemon=True).start()
333
+ # return jsonify({"ok": True})
334
+
335
+ def find_system_loopback_index():
336
+ """
337
+ Try to find a likely loopback / system audio input device.
338
+ Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
339
+ 'virtual', 'audio cable'. Otherwise fallback to default input device.
340
+ """
341
+ pa = None
342
+ try:
343
+ import pyaudio
344
+ pa = pyaudio.PyAudio()
345
+ except Exception:
346
+ return None
347
+
348
+ keywords = ["loop", "stereo", "mix", "what u hear", "virtual", "audio cable", "loopback", "monitor"]
349
+ best_idx = None
350
+ for i in range(pa.get_device_count()):
351
+ try:
352
+ dev = pa.get_device_info_by_index(i)
353
+ name = (dev.get("name") or "").lower()
354
+ max_in = dev.get("maxInputChannels", 0)
355
+ if max_in <= 0:
356
+ continue
357
+ for kw in keywords:
358
+ if kw in name:
359
+ best_idx = int(dev["index"])
360
+ pa.terminate()
361
+ return best_idx
362
+ except Exception:
363
+ continue
364
+
365
+ try:
366
+ default_info = pa.get_default_input_device_info()
367
+ idx = int(default_info.get("index"))
368
+ pa.terminate()
369
+ return idx
370
+ except Exception:
371
+ if pa:
372
+ pa.terminate()
373
+ return None
374
+
375
+ @app.route("/api/start-transcribe-file", methods=["POST"])
376
+ def api_start_transcribe_file():
377
+ """
378
+ Start a background thread which calls rec_transcribe_extension.run_recording(...)
379
+ We try to detect a loopback device; if not found we pick the default input device.
380
+ """
381
+ global recording_thread
382
+ body = request.get_json(force=True, silent=True) or {}
383
+ filename = body.get('filename')
384
+
385
+ # Basic check: uploaded file exists (we don't actually play the file on the server,
386
+ # but it's a sanity check so user didn't start without uploading)
387
+ if filename:
388
+ if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
389
+ return jsonify(success=False, error="Uploaded file not found on server"), 400
390
+
391
+ with recording_lock:
392
+ # if there's an active recording, return ok
393
+ if recording_thread and recording_thread.is_alive():
394
+ return jsonify(success=True, message="Recording already running")
395
+ # clear any previous stop_event
396
+ try:
397
+ if hasattr(rte, 'stop_event'):
398
+ rte.stop_event = threading.Event() # new event the run_recording will wait on
399
+ except Exception:
400
+ pass
401
+
402
+ # choose device: prefer loopback
403
+ dev_index = find_system_loopback_index()
404
+ if dev_index is None:
405
+ return jsonify(success=False, error="No suitable audio input device found on server"), 500
406
+
407
+ # Start the recording in a background thread
408
+ def target():
409
+ try:
410
+ from rec_transcribe_extension import chunk_writer_and_transcribe_worker
411
+ import rec_transcribe_extension as rte
412
+ orig_worker = rte.chunk_writer_and_transcribe_worker
413
+
414
+ def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
415
+ while True:
416
+ try:
417
+ filename, frames = in_queue.get(timeout=1.0)
418
+ except queue.Empty:
419
+ if rte.stop_event.is_set() and in_queue.empty():
420
+ break
421
+ continue
422
+
423
+ rte.save_wav_from_frames(filename, frames, nchannels=rte.CHANNELS)
424
+ final_frames_list.extend(frames)
425
+
426
+ diar_segments = rte.diarization_hook(str(filename)) or []
427
+
428
+ if transcriber and transcriber.model:
429
+ try:
430
+ segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
431
+ for seg in segments:
432
+ seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
433
+ speaker = "Unknown"
434
+ for d_start, d_end, d_speaker in diar_segments:
435
+ if (seg_start < d_end) and (seg_end > d_start):
436
+ speaker = d_speaker
437
+ break
438
+ # Write diarized transcript line
439
+ line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
440
+ with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
441
+ tf.write(line)
442
+ except Exception as e:
443
+ print(f"Transcription error for {filename}: {e}")
444
+
445
+ print("Patched worker exiting.")
446
+
447
+ # Apply patch
448
+ rte.chunk_writer_and_transcribe_worker = patched_worker
449
+ try:
450
+ rte.run_recording(
451
+ mic_index=dev_index,
452
+ sys_index=None,
453
+ chunk_secs=getattr(rte, 'CHUNK_DURATION_SECS', 3),
454
+ model_name=getattr(rte, 'MODEL_NAME', None),
455
+ no_transcribe=False
456
+ )
457
+ finally:
458
+ rte.chunk_writer_and_transcribe_worker = orig_worker
459
+
460
+ except Exception as e:
461
+ print("run_recording exception:", e)
462
+
463
+
464
+ recording_thread = threading.Thread(target=target, daemon=True)
465
+ recording_thread.start()
466
+ return jsonify(success=True, message="Recording started", device_index=dev_index)
467
+
468
+ # @app.route("/static/<path:filename>")
469
+ # def static_files(filename):
470
+ # return send_from_directory(OUTPUT_DIR, filename)
471
+
472
+ @app.route("/stop", methods=["POST"])
473
+ def stop_recording():
474
+ """
475
+ Signal the rec_transcribe_extension stop_event to stop gracefully.
476
+ """
477
+ global recording_thread
478
+ with recording_lock:
479
+ # set the stop_event in module
480
+ if hasattr(rte, 'stop_event') and rte.stop_event is not None:
481
+ try:
482
+ rte.stop_event.set()
483
+ except Exception:
484
+ pass
485
+ return jsonify(success=True, message="Stop signal sent")
486
+
487
+
488
+ def tail_transcript_file(path, stop_cond_fn=None):
489
+ """
490
+ Generator that tails the transcript file and yields SSE data lines.
491
+ If file doesn't exist yet, yield a short status message then keep waiting.
492
+ stop_cond_fn is a callable that when returns True will break.
493
+ """
494
+ last_pos = 0
495
+ sent_initial = False
496
+ while True:
497
+ if stop_cond_fn and stop_cond_fn():
498
+ break
499
+ if os.path.exists(path):
500
+ with open(path, "r", encoding="utf-8", errors="ignore") as fh:
501
+ fh.seek(last_pos)
502
+ lines = fh.readlines()
503
+ if lines:
504
+ for ln in lines:
505
+ ln = ln.strip()
506
+ if ln:
507
+ yield f"data: {ln}\n\n"
508
+ last_pos = fh.tell()
509
+ sent_initial = True
510
+ else:
511
+ # no new lines
512
+ time.sleep(0.25)
513
+ else:
514
+ if not sent_initial:
515
+ yield "data: [info] Transcript file not yet created. Waiting...\n\n"
516
+ sent_initial = True
517
+ time.sleep(0.5)
518
+ # final notification
519
+ yield "data: [info] Transcription ended.\n\n"
520
+ # ---------------- SSE events (from app2) ----------------
521
+
522
+
523
+ @app.route("/events")
524
+ def events():
525
+ """
526
+ SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
527
+ The stream ends when the module stop_event is set and the background recording thread finishes.
528
+ """
529
+ transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
530
+ if not transcript_path:
531
+ return Response("No transcript file configured", status=500)
532
+ transcript_path = str(transcript_path)
533
+
534
+ def stop_fn():
535
+ # stop when the recording thread is no longer alive AND the module stop_event is set
536
+ cond = False
537
+ try:
538
+ cond = (hasattr(rte, 'stop_event')
539
+ and rte.stop_event is not None and rte.stop_event.is_set())
540
+ except Exception:
541
+ cond = False
542
+ # also stop if thread finished
543
+ t_alive = recording_thread.is_alive() if recording_thread is not None else False
544
+ # If stop requested and thread not alive -> end stream
545
+ return (cond and not t_alive)
546
+
547
+ return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
548
+ mimetype="text/event-stream")
549
+
550
+ @app.route("/status")
551
+ def status():
552
+ running = False
553
+ if recording_thread and recording_thread.is_alive():
554
+ running = True
555
+ return jsonify(running=running)
556
+
557
+ # ---------------- Run ----------------
558
+ if __name__ == "__main__":
559
+ app.run(host="0.0.0.0", port=7860, debug=True)
rec_transcribe_extension.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import wave
4
+ import queue
5
+ import threading
6
+ import datetime
7
+ from pathlib import Path
8
+ import wave
9
+ import pyaudio
10
+ from pyannote.audio import Pipeline
11
+
12
+ try:
13
+ diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
14
+ DIARIZATION_AVAILABLE = True
15
+ except Exception:
16
+ diarization_pipeline = None
17
+ DIARIZATION_AVAILABLE = False
18
+
19
+ # Optional modules (import safely)
20
+ try:
21
+ from faster_whisper import WhisperModel
22
+ FASTER_WHISPER_AVAILABLE = True
23
+ except Exception:
24
+ FASTER_WHISPER_AVAILABLE = False
25
+
26
+ import numpy as np
27
+
28
+ # # Optional: voice activity detection
29
+ # try:
30
+ # import webrtcvad
31
+ # VAD_AVAILABLE = True
32
+ # except Exception:
33
+ # VAD_AVAILABLE = False
34
+
35
+ # ========== CONFIG ==========
36
+ RUN_TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
37
+ FORMAT = pyaudio.paInt16
38
+ CHANNELS = 1 # mono
39
+ # RATE = 16000
40
+ RATE = 44100
41
+ CHUNK = 1024 # frames per buffer read
42
+ CHUNK_DURATION_SECS = 5 # how long each saved chunk is (seconds)
43
+ OUTPUT_DIR = Path("output_transcript_diarization")
44
+ CHUNKS_DIR = OUTPUT_DIR / f"chunks_{RUN_TIMESTAMP}"
45
+ FINAL_WAV = OUTPUT_DIR / f"recorded_audio_{RUN_TIMESTAMP}.wav"
46
+ TRANSCRIPT_FILE = OUTPUT_DIR / f"transcript_{RUN_TIMESTAMP}.txt"
47
+ MODEL_NAME = "medium" # if using faster-whisper; change as desired
48
+ # ============================
49
+
50
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
51
+ CHUNKS_DIR.mkdir(parents=True, exist_ok=True)
52
+
53
+ audio = pyaudio.PyAudio()
54
+
55
+ def list_input_devices():
56
+ '''
57
+ Lists all available audio input devices (microphones, loopbacks, etc.) with their
58
+ indices and channel counts.
59
+ '''
60
+ pa = pyaudio.PyAudio()
61
+ print("Available audio devices (inputs):")
62
+ for i in range(pa.get_device_count()):
63
+ dev = pa.get_device_info_by_index(i)
64
+ if dev.get("maxInputChannels", 0) > 0:
65
+ print(f" {i}: {dev['name']} - {dev['maxInputChannels']} chans")
66
+ pa.terminate()
67
+
68
+ def open_stream_for_device(device_index, channels=1):
69
+ '''
70
+ Opens a PyAudio input stream for the given device index and channel count.
71
+ '''
72
+ stream = audio.open(format=FORMAT,
73
+ channels=channels,
74
+ rate=RATE,
75
+ input=True,
76
+ frames_per_buffer=CHUNK,
77
+ input_device_index=device_index)
78
+ return stream
79
+
80
+ def save_wav_from_frames(path: Path, frames: list, nchannels=1):
81
+ '''
82
+ Saves a list of audio frames as a WAV file at the specified path.
83
+ '''
84
+ # Normalize of 44100 Rate
85
+ raw = b''.join(frames)
86
+ audio_array = np.frombuffer(raw, dtype=np.int16)
87
+
88
+ # Normalize: scale to 90% of int16 range
89
+ if np.max(np.abs(audio_array)) > 0:
90
+ audio_array = (audio_array / np.max(np.abs(audio_array)) * 32767 * 0.9).astype(np.int16)
91
+
92
+ with wave.open(str(path), 'wb') as wf:
93
+ wf.setnchannels(nchannels)
94
+ wf.setsampwidth(audio.get_sample_size(FORMAT))
95
+ wf.setframerate(RATE)
96
+ # wf.writeframes(b''.join(frames))
97
+ wf.writeframes(audio_array.tobytes())
98
+
99
+ def merge_mono_files_to_stereo(mic_path: Path, sys_path: Path, out_path: Path):
100
+ """
101
+ Create simple stereo WAV: mic -> left channel, system -> right channel.
102
+ Requires numpy. Very naive — works when both inputs have same sample rate and length.
103
+ """
104
+ with wave.open(str(mic_path), 'rb') as wm, wave.open(str(sys_path), 'rb') as ws:
105
+ assert wm.getframerate() == ws.getframerate() == RATE
106
+ sampwidth = wm.getsampwidth()
107
+ nframes = min(wm.getnframes(), ws.getnframes())
108
+ mic_bytes = wm.readframes(nframes)
109
+ sys_bytes = ws.readframes(nframes)
110
+
111
+ # convert bytes to int16
112
+ mic_arr = np.frombuffer(mic_bytes, dtype=np.int16)
113
+ sys_arr = np.frombuffer(sys_bytes, dtype=np.int16)
114
+
115
+ # interleave into stereo
116
+ stereo = np.empty((nframes * 2,), dtype=np.int16)
117
+ stereo[0::2] = mic_arr[:nframes]
118
+ stereo[1::2] = sys_arr[:nframes]
119
+
120
+ with wave.open(str(out_path), 'wb') as wf:
121
+ wf.setnchannels(2)
122
+ wf.setsampwidth(sampwidth)
123
+ wf.setframerate(RATE)
124
+ wf.writeframes(stereo.tobytes())
125
+
126
+ class Transcriber:
127
+ def __init__(self):
128
+ '''
129
+ Loads the faster-whisper model if available.
130
+ '''
131
+ self.model = None
132
+ if FASTER_WHISPER_AVAILABLE:
133
+ print("Loading faster-whisper model. This may take some time...")
134
+ # detect device via torch.cuda if available, otherwise CPU
135
+ try:
136
+ import torch
137
+ device = "cuda" if torch.cuda.is_available() else "cpu"
138
+ except Exception:
139
+ device = "cpu"
140
+
141
+ # choose a safe compute_type: float16 on GPU, float32 on CPU
142
+ compute_type = "float16" if device == "cuda" else "float32"
143
+
144
+ try:
145
+ # instantiate model (may download weights on first run)
146
+ self.model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
147
+ print(f"Model loaded on {device} (compute_type={compute_type}).")
148
+ except Exception as e:
149
+ print("Failed to load faster-whisper model:", e)
150
+ print("Continuing without transcription.")
151
+ self.model = None
152
+ else:
153
+ print("faster-whisper not available. Transcription will be disabled.")
154
+
155
+ def transcribe_file(self, wav_path: str):
156
+ '''
157
+ Transcribes a WAV file and returns the text.
158
+ '''
159
+ if not self.model:
160
+ return None
161
+ try:
162
+ segments, info = self.model.transcribe(wav_path, beam_size=5)
163
+ text = " ".join([seg.text.strip() for seg in segments])
164
+ return text
165
+ except Exception as e:
166
+ print(f"Transcription error for {wav_path}: {e}")
167
+ return None
168
+
169
+ def diarization_hook(audio_path: str):
170
+ """
171
+ Run speaker diarization and return list of (start, end, speaker) tuples.
172
+ """
173
+ if not DIARIZATION_AVAILABLE:
174
+ return None
175
+ diarization = diarization_pipeline(audio_path)
176
+ results = []
177
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
178
+ results.append((turn.start, turn.end, speaker))
179
+ return results
180
+
181
+ # Recorder threads
182
+ def record_loop(device_index, out_queue, label="mic"):
183
+ """
184
+ Continuously read bytes from device stream and push full-second frames to queue.
185
+ """
186
+ try:
187
+ stream = open_stream_for_device(device_index, channels=CHANNELS)
188
+ except Exception as e:
189
+ print(f"Could not open stream for device {device_index} ({label}): {e}")
190
+ return
191
+ frames_per_chunk = int(RATE / CHUNK * CHUNK_DURATION_SECS)
192
+ frames = []
193
+ print(f"Recording from device {device_index} ({label}) ... Press Ctrl+C to stop.")
194
+ error_count = 0
195
+ try:
196
+ while True:
197
+ try:
198
+ data = stream.read(CHUNK, exception_on_overflow=False)
199
+ error_count = 0 # reset on success
200
+ except Exception as e:
201
+ print(f"Read error on device {device_index} ({label}): {e}")
202
+ error_count += 1
203
+ if error_count > 10:
204
+ print(f"Too many errors on device {device_index} ({label}). Stopping this thread.")
205
+ break
206
+ continue
207
+ frames.append(data)
208
+ if len(frames) >= frames_per_chunk:
209
+ ts = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
210
+ filename = CHUNKS_DIR / f"{label}_{ts}.wav"
211
+ out_queue.put((filename, frames.copy()))
212
+ frames = []
213
+ except KeyboardInterrupt:
214
+ print(f"Recording thread {label} received KeyboardInterrupt.")
215
+ finally:
216
+ try:
217
+ stream.stop_stream()
218
+ stream.close()
219
+ except Exception:
220
+ pass
221
+ print(f"Recording thread for {label} exited.")
222
+
223
+ def chunk_writer_and_transcribe_worker(in_queue: queue.Queue, final_frames_list: list, transcriber: Transcriber, single_channel_label="mic"):
224
+ """
225
+ Save chunk WAV files and optionally send for transcription.
226
+ Also store frames for final concatenated WAV.
227
+ """
228
+ while True:
229
+ try:
230
+ filename, frames = in_queue.get(timeout=1.0)
231
+ except queue.Empty:
232
+ if stop_event.is_set() and in_queue.empty():
233
+ break
234
+ continue
235
+
236
+ save_wav_from_frames(filename, frames, nchannels=CHANNELS)
237
+ print(f"Saved chunk: {filename.name}")
238
+ final_frames_list.extend(frames)
239
+
240
+ diar = diarization_hook(str(filename))
241
+ diar_segments = []
242
+ if diar:
243
+ diar_segments = diar # list of (start, end, speaker)
244
+
245
+ # Transcribe chunk and get segments with timestamps
246
+ if transcriber and transcriber.model:
247
+ try:
248
+ # Get segments with timestamps
249
+ segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
250
+ # For each segment, find the speaker
251
+ for seg in segments:
252
+ seg_start = seg.start
253
+ seg_end = seg.end
254
+ seg_text = seg.text.strip()
255
+ # Find speaker whose segment overlaps with this transcription segment
256
+ speaker = "Unknown"
257
+ for d_start, d_end, d_speaker in diar_segments:
258
+ # If diarization segment overlaps with transcription segment
259
+ if (seg_start < d_end) and (seg_end > d_start):
260
+ speaker = d_speaker
261
+ break
262
+ line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
263
+ print(line.strip())
264
+ with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
265
+ tf.write(line)
266
+ except Exception as e:
267
+ print(f"Transcription error for {filename.name}: {e}")
268
+ print("Chunk writer/transcriber worker exiting.")
269
+
270
+ def run_recording(mic_index, sys_index=None, chunk_secs=CHUNK_DURATION_SECS, model_name=MODEL_NAME, no_transcribe=False):
271
+ global CHUNK_DURATION_SECS, MODEL_NAME, stop_event
272
+ CHUNK_DURATION_SECS = chunk_secs
273
+ MODEL_NAME = model_name
274
+
275
+
276
+
277
+ # Start transcriber if enabled
278
+ transcriber = None if no_transcribe else Transcriber()
279
+
280
+ # Queues and threads
281
+ q = queue.Queue()
282
+ final_frames = []
283
+ if 'stop_event' not in globals() or stop_event is None:
284
+ stop_event = threading.Event()
285
+
286
+ mic_thread = threading.Thread(target=record_loop, args=(mic_index, q, "mic"), daemon=True)
287
+ mic_thread.start()
288
+
289
+ q_sys = None
290
+ writer_thread_sys = None
291
+ final_frames_sys = []
292
+
293
+ if sys_index is not None:
294
+ q_sys = queue.Queue()
295
+ sys_thread = threading.Thread(target=record_loop, args=(sys_index, q_sys, "sys"), daemon=True)
296
+ sys_thread.start()
297
+ writer_thread_sys = threading.Thread(target=chunk_writer_and_transcribe_worker, args=(q_sys, final_frames_sys, None, "sys"), daemon=True)
298
+ writer_thread_sys.start()
299
+
300
+ writer_thread = threading.Thread(target=chunk_writer_and_transcribe_worker, args=(q, final_frames, transcriber, "mic"), daemon=True)
301
+ writer_thread.start()
302
+
303
+ # try:
304
+ # while True:
305
+ # time.sleep(0.5)
306
+ # except KeyboardInterrupt:
307
+ # print("\nStopping all threads...")
308
+ # stop_event.set()
309
+ # time.sleep(1.0)
310
+
311
+ try:
312
+ # wait until the shared stop_event is set by the caller (Flask / api_stop-recording)
313
+ while not stop_event.is_set():
314
+ time.sleep(0.5)
315
+ except KeyboardInterrupt:
316
+ print("\nStopping all threads.")
317
+ stop_event.set()
318
+ time.sleep(1.0)
319
+
320
+ writer_thread.join(timeout=5)
321
+ if writer_thread_sys:
322
+ writer_thread_sys.join(timeout=5)
323
+
324
+ if final_frames:
325
+ save_wav_from_frames(FINAL_WAV, final_frames, nchannels=CHANNELS)
326
+ print(f"Saved final WAV: {FINAL_WAV}")
327
+
328
+ if final_frames and final_frames_sys:
329
+ final_sys_wav = OUTPUT_DIR / "recorded_system_full.wav"
330
+ save_wav_from_frames(final_sys_wav, final_frames_sys, nchannels=CHANNELS)
331
+ stereo_path = OUTPUT_DIR / "recorded_audio_stereo.wav"
332
+ merge_mono_files_to_stereo(FINAL_WAV, final_sys_wav, stereo_path)
333
+ print(f"Saved merged stereo WAV: {stereo_path}")
334
+
335
+ audio.terminate()
336
+ print("Done. Transcript (if any) saved to:", TRANSCRIPT_FILE)
337
+ # Main
338
+ if __name__ == "__main__":
339
+ list_input_devices()
340
+ mic_index = input("\nEnter the device index for your microphone (or press ENTER to use default): ").strip()
341
+ if mic_index == "":
342
+ mic_index = pyaudio.PyAudio().get_default_input_device_info()['index']
343
+ else:
344
+ mic_index = int(mic_index)
345
+ run_recording(mic_index)
requirements.txt ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs
2
+ aiohttp
3
+ aioice
4
+ aiortc
5
+ aiosignal
6
+ alembic
7
+ antlr4-python3-runtime
8
+ asteroid-filterbanks
9
+ asttokens
10
+ attrs
11
+ audioread
12
+ av
13
+ bidict
14
+ blinker
15
+ certifi
16
+ cffi
17
+ charset-normalizer
18
+ click
19
+ colorama
20
+ coloredlogs
21
+ colorlog
22
+ comm
23
+ contourpy
24
+ cryptography
25
+ ctranslate2
26
+ cycler
27
+ debugpy
28
+ decorator
29
+ dnspython
30
+ docopt
31
+ einops
32
+ eventlet
33
+ executing
34
+ faster-whisper
35
+ filelock
36
+ Flask
37
+ Flask-SocketIO
38
+ flatbuffers
39
+ fonttools
40
+ frozenlist
41
+ fsspec
42
+ google-crc32c
43
+ greenlet
44
+ h11
45
+ huggingface-hub
46
+ humanfriendly
47
+ HyperPyYAML
48
+ idna
49
+ ifaddr
50
+ ipykernel
51
+ ipython
52
+ ipython_pygments_lexers
53
+ itsdangerous
54
+ jedi
55
+ Jinja2
56
+ joblib
57
+ julius
58
+ jupyter_client
59
+ jupyter_core
60
+ kiwisolver
61
+ lazy_loader
62
+ librosa
63
+ lightning
64
+ lightning-utilities
65
+ llvmlite
66
+ Mako
67
+ markdown-it-py
68
+ MarkupSafe
69
+ matplotlib
70
+ matplotlib-inline
71
+ mdurl
72
+ more-itertools
73
+ mpmath
74
+ msgpack
75
+ multidict
76
+ mypy_extensions
77
+ nest-asyncio
78
+ networkx
79
+ numba
80
+ numpy
81
+ omegaconf
82
+ onnxruntime
83
+ openai-whisper
84
+ optuna
85
+ packaging
86
+ pandas
87
+ parso
88
+ pillow
89
+ platformdirs
90
+ pooch
91
+ primePy
92
+ prompt_toolkit
93
+ propcache
94
+ protobuf
95
+ psutil
96
+ pure_eval
97
+ pyannotate
98
+ pyannote.audio
99
+ pyannote.core
100
+ pyannote.database
101
+ pyannote.metrics
102
+ pyannote.pipeline
103
+ PyAudio
104
+ pycparser
105
+ pydub
106
+ pyee
107
+ Pygments
108
+ pylibsrtp
109
+ pyOpenSSL
110
+ pyparsing
111
+ pyreadline3
112
+ python-dateutil
113
+ python-engineio
114
+ python-socketio
115
+ pytorch-lightning
116
+ pytorch-metric-learning
117
+ pytz
118
+ pywin32
119
+ PyYAML
120
+ pyzmq
121
+ regex
122
+ requests
123
+ resampy
124
+ Resemblyzer
125
+ rich
126
+ ruamel.yaml
127
+ ruamel.yaml.clib
128
+ safetensors
129
+ scikit-learn
130
+ scipy
131
+ semver
132
+ sentencepiece
133
+ setuptools
134
+ shellingham
135
+ simple-websocket
136
+ six
137
+ sortedcontainers
138
+ SoundCard
139
+ sounddevice
140
+ soundfile
141
+ soxr
142
+ speechbrain
143
+ SQLAlchemy
144
+ stack-data
145
+ sympy
146
+ tabulate
147
+ tensorboardX
148
+ threadpoolctl
149
+ tiktoken
150
+ tokenizers
151
+ torch
152
+ torch-audiomentations
153
+ torch_pitch_shift
154
+ torchaudio
155
+ torchmetrics
156
+ tornado
157
+ tqdm
158
+ traitlets
159
+ transformers
160
+ typer
161
+ typing
162
+ typing_extensions
163
+ tzdata
164
+ urllib3
165
+ wavio
166
+ wcwidth
167
+ webrtcvad
168
+ websocket-client
169
+ Werkzeug
170
+ wsproto
171
+ yarl
static/icon_upload.png ADDED
templates/index2.html ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <title>Audio Transcription Studio</title>
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
8
+ <!-- <link rel="icon" href="https://lovable.dev/favicon.ico"> -->
9
+ <!-- <link rel="icon" href="https://cdn-icons-png.flaticon.com/512/727/727245.png?v=2"> -->
10
+ <link rel="icon" href=".../icons8-speech recognition-external-smashingstocks-glyph-smashing-stocks-32.png?v=2">
11
+ <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:400,600,700&display=swap">
12
+ <style>
13
+ :root {
14
+ --bg: #18122b;
15
+ --bg-card: #231942;
16
+ --bg-card2: #251e3e;
17
+ --accent: #a259ec;
18
+ --accent2: #2563eb;
19
+ --text: #fff;
20
+ --text-muted: #bcbcbc;
21
+ --border: #312e4a;
22
+ --success: #22c55e;
23
+ --danger: #dc2626;
24
+ --cyan: #00fff7;
25
+ }
26
+
27
+ html,
28
+ body {
29
+ height: 100%;
30
+ margin: 0;
31
+ padding: 0;
32
+ font-family: 'Inter', Arial, sans-serif;
33
+ background: var(--bg);
34
+ color: var(--text);
35
+ }
36
+
37
+ .layout {
38
+ display: flex;
39
+ min-height: 100vh;
40
+ gap: 32px;
41
+ padding: 32px;
42
+ box-sizing: border-box;
43
+ }
44
+
45
+ .main-panel {
46
+ flex: 2;
47
+ display: flex;
48
+ flex-direction: column;
49
+ gap: 24px;
50
+ }
51
+
52
+ .card {
53
+ background: var(--bg-card);
54
+ border-radius: 18px;
55
+ box-shadow: 0 2px 16px #0003;
56
+ padding: 32px 32px 24px 32px;
57
+ margin-bottom: 0;
58
+ border: 1.5px solid var(--border);
59
+ }
60
+
61
+ .card h2,
62
+ .card h3 {
63
+ margin-top: 0;
64
+ color: var(--accent);
65
+ font-size: 1.5em;
66
+ font-weight: 700;
67
+ margin-bottom: 18px;
68
+ letter-spacing: 1px;
69
+ }
70
+
71
+ .sidebar {
72
+ flex: 1;
73
+ min-width: 320px;
74
+ background: var(--bg-card2);
75
+ border-radius: 18px;
76
+ box-shadow: 0 2px 16px #0003;
77
+ padding: 32px 28px 24px 28px;
78
+ display: flex;
79
+ flex-direction: column;
80
+ gap: 32px;
81
+ border: 1.5px solid var(--border);
82
+ height: fit-content;
83
+ }
84
+
85
+ .sidebar h3 {
86
+ color: var(--accent2);
87
+ font-size: 1.2em;
88
+ font-weight: 700;
89
+ margin-bottom: 18px;
90
+ letter-spacing: 1px;
91
+ display: flex;
92
+ align-items: center;
93
+ gap: 8px;
94
+ }
95
+
96
+ .sidebar label {
97
+ font-size: 1em;
98
+ color: var(--text-muted);
99
+ margin-top: 18px;
100
+ font-weight: 600;
101
+ display: flex;
102
+ align-items: center;
103
+ gap: 8px;
104
+ }
105
+
106
+ .sidebar select,
107
+ .sidebar input[type="number"] {
108
+ width: 100%;
109
+ margin-top: 6px;
110
+ padding: 10px;
111
+ border-radius: 8px;
112
+ border: 1px solid var(--border);
113
+ background: #201c3a;
114
+ color: var(--text);
115
+ font-size: 1em;
116
+ margin-bottom: 10px;
117
+ outline: none;
118
+ transition: border 0.2s;
119
+ }
120
+
121
+ .sidebar select:focus,
122
+ .sidebar input[type="number"]:focus {
123
+ border: 1.5px solid var(--accent2);
124
+ }
125
+
126
+ .sidebar button {
127
+ width: 100%;
128
+ padding: 14px 0;
129
+ margin-top: 18px;
130
+ border: none;
131
+ border-radius: 8px;
132
+ background: var(--accent);
133
+ color: #fff;
134
+ font-size: 1.1em;
135
+ font-weight: 600;
136
+ cursor: pointer;
137
+ transition: background 0.2s;
138
+ box-shadow: 0 2px 8px #0002;
139
+ }
140
+
141
+ .sidebar button:disabled {
142
+ background: #a5b4fc;
143
+ cursor: not-allowed;
144
+ }
145
+
146
+ .sidebar .stop-btn {
147
+ background: var(--danger);
148
+ margin-top: 8px;
149
+ }
150
+
151
+ .toggle-row {
152
+ display: flex;
153
+ align-items: center;
154
+ gap: 10px;
155
+ margin-top: 10px;
156
+ }
157
+
158
+ .toggle-label {
159
+ flex: 1;
160
+ color: var(--text-muted);
161
+ font-size: 1em;
162
+ }
163
+
164
+ .toggle-switch {
165
+ width: 38px;
166
+ height: 22px;
167
+ background: #333;
168
+ border-radius: 12px;
169
+ position: relative;
170
+ cursor: pointer;
171
+ transition: background 0.2s;
172
+ }
173
+
174
+ .toggle-switch input {
175
+ display: none;
176
+ }
177
+
178
+ .toggle-slider {
179
+ position: absolute;
180
+ top: 2px;
181
+ left: 2px;
182
+ width: 18px;
183
+ height: 18px;
184
+ background: var(--accent2);
185
+ border-radius: 50%;
186
+ transition: left 0.2s;
187
+ }
188
+
189
+ .toggle-switch input:checked+.toggle-slider {
190
+ left: 18px;
191
+ background: var(--danger);
192
+ }
193
+
194
+ .status {
195
+ margin: 18px 0 0 0;
196
+ font-weight: bold;
197
+ color: var(--success);
198
+ font-size: 1.1em;
199
+ text-align: center;
200
+ }
201
+
202
+ .recorder-center {
203
+ display: flex;
204
+ flex-direction: column;
205
+ align-items: center;
206
+ gap: 18px;
207
+ margin-bottom: 18px;
208
+ }
209
+
210
+ .recorder-btn {
211
+ width: 90px;
212
+ height: 90px;
213
+ border-radius: 50%;
214
+ background: linear-gradient(135deg, #a259ec 60%, #2563eb 100%);
215
+ display: flex;
216
+ align-items: center;
217
+ justify-content: center;
218
+ box-shadow: 0 0 32px #a259ec55;
219
+ cursor: pointer;
220
+ transition: box-shadow 0.2s, background 0.2s;
221
+ position: relative;
222
+ }
223
+
224
+ .recorder-btn.recording {
225
+ background: linear-gradient(135deg, #dc2626 60%, #a259ec 100%);
226
+ box-shadow: 0 0 32px #dc262655;
227
+ animation: pulse 1.2s infinite;
228
+ }
229
+
230
+ @keyframes pulse {
231
+ 0% {
232
+ box-shadow: 0 0 32px #dc262655;
233
+ }
234
+
235
+ 50% {
236
+ box-shadow: 0 0 48px #dc2626aa;
237
+ }
238
+
239
+ 100% {
240
+ box-shadow: 0 0 32px #dc262655;
241
+ }
242
+ }
243
+
244
+ .recorder-btn svg {
245
+ width: 38px;
246
+ height: 38px;
247
+ color: #fff;
248
+ }
249
+
250
+ .recorder-status {
251
+ color: var(--success);
252
+ font-size: 1.1em;
253
+ font-weight: 600;
254
+ margin-top: 8px;
255
+ }
256
+
257
+ .recorder-status.recording {
258
+ color: var(--danger);
259
+ }
260
+
261
+ .live {
262
+ margin-top: 0;
263
+ background: #201c3a;
264
+ border-radius: 12px;
265
+ padding: 18px 18px 10px 18px;
266
+ min-height: 90px;
267
+ border: 1px solid var(--border);
268
+ overflow: hidden;
269
+ /* hide outer overflow, inner #live will scroll */
270
+ display: flex;
271
+ flex-direction: column;
272
+ }
273
+
274
+ /* inner container which actually scrolls */
275
+ #live {
276
+ flex: 1 1 auto;
277
+ overflow-y: auto;
278
+ padding-right: 6px;
279
+ /* give room for scroll bar */
280
+ -webkit-overflow-scrolling: touch;
281
+ scroll-behavior: smooth;
282
+ color: var(--text-muted);
283
+ }
284
+
285
+ .live h4 {
286
+ margin: 0 0 10px 0;
287
+ color: var(--cyan);
288
+ font-size: 1.08em;
289
+ font-weight: 600;
290
+ display: flex;
291
+ align-items: center;
292
+ gap: 8px;
293
+ }
294
+
295
+ .chunk {
296
+ background: linear-gradient(90deg, rgba(45, 37, 74, 0.2), rgba(38, 32, 63, 0.12));
297
+ margin-bottom: 8px;
298
+ padding: 10px 12px;
299
+ border-radius: 8px;
300
+ font-size: 0.98em;
301
+ color: var(--text);
302
+ box-shadow: 0 1px 2px #0002;
303
+ border: 1px solid rgba(255, 255, 255, 0.02);
304
+ }
305
+
306
+ /* Small speaker label */
307
+ .chunk b {
308
+ color: var(--cyan);
309
+ margin-right: 6px;
310
+ font-weight: 700;
311
+ }
312
+
313
+ /* THEMED SCROLLBAR - WebKit (Chrome, Edge, Safari) */
314
+ #live::-webkit-scrollbar {
315
+ width: 10px;
316
+ }
317
+
318
+ #live::-webkit-scrollbar-track {
319
+ background: rgba(255, 255, 255, 0.02);
320
+ border-radius: 10px;
321
+ }
322
+
323
+ #live::-webkit-scrollbar-thumb {
324
+ background: linear-gradient(180deg, var(--accent) 0%, var(--accent2) 100%);
325
+ border-radius: 10px;
326
+ border: 2px solid rgba(0, 0, 0, 0.15);
327
+ }
328
+
329
+ #live::-webkit-scrollbar-thumb:hover {
330
+ filter: brightness(0.95);
331
+ }
332
+
333
+ /* THEMED SCROLLBAR - Firefox */
334
+ #live {
335
+ scrollbar-width: thin;
336
+ scrollbar-color: var(--accent) rgba(255, 255, 255, 0.02);
337
+ }
338
+
339
+ /* responsive: reduce max-height on small screens */
340
+ @media (max-width: 700px) {
341
+ .live {
342
+ max-height: 200px;
343
+ }
344
+ }
345
+
346
+ .files h4 {
347
+ color: var(--accent2);
348
+ font-size: 1.08em;
349
+ margin: 0 0 10px 0;
350
+ font-weight: 600;
351
+ display: flex;
352
+ align-items: center;
353
+ gap: 8px;
354
+ }
355
+
356
+ .file {
357
+ background: #2d254a;
358
+ margin-bottom: 8px;
359
+ padding: 8px 12px;
360
+ border-radius: 5px;
361
+ font-size: 1em;
362
+ color: #e0e7ef;
363
+ display: flex;
364
+ align-items: center;
365
+ justify-content: space-between;
366
+ box-shadow: 0 1px 2px #0001;
367
+ }
368
+
369
+ .file a {
370
+ color: var(--accent2);
371
+ text-decoration: none;
372
+ font-weight: 500;
373
+ }
374
+
375
+ .file a:hover {
376
+ text-decoration: underline;
377
+ }
378
+
379
+ #audio-player-container {
380
+ margin-bottom: 18px;
381
+ }
382
+
383
+ #waveform {
384
+ width: 100%;
385
+ height: 80px;
386
+ background: #2d254a;
387
+ border-radius: 6px;
388
+ }
389
+
390
+ #transcript-container {
391
+ background: #2d254a;
392
+ padding: 14px;
393
+ border-radius: 6px;
394
+ margin-top: 24px;
395
+ }
396
+
397
+ #transcript-content {
398
+ margin-top: 10px;
399
+ white-space: pre-wrap;
400
+ font-size: 1em;
401
+ color: #e0e7ef;
402
+ max-height: 300px;
403
+ overflow: auto;
404
+ background: #201c3a;
405
+ padding: 10px;
406
+ border-radius: 4px;
407
+ }
408
+
409
+ @media (max-width: 1100px) {
410
+ .layout {
411
+ flex-direction: column;
412
+ gap: 0;
413
+ padding: 12px;
414
+ }
415
+
416
+ .sidebar {
417
+ min-width: unset;
418
+ width: 100%;
419
+ margin-bottom: 18px;
420
+ }
421
+
422
+ .main-panel {
423
+ padding: 0;
424
+ }
425
+ }
426
+
427
+ @media (max-width: 700px) {
428
+
429
+ .card,
430
+ .sidebar {
431
+ padding: 16px 8px 12px 8px;
432
+ }
433
+
434
+ .main-panel {
435
+ gap: 12px;
436
+ }
437
+ }
438
+ </style>
439
+ </head>
440
+
441
+ <body>
442
+ <div class="layout">
443
+ <main class="main-panel">
444
+ <section class="card">
445
+ <h2 style="text-align:center;font-size:2.2em;color:#a259ec;margin-bottom:0;">Audio Transcription Studio</h2>
446
+ <div style="text-align:center;color:#bcbcbc;margin-bottom:24px;">
447
+ Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.
448
+ </div>
449
+ <div class="recorder-center">
450
+ <div id="recorderBtn" class="recorder-btn" title="Start/Stop Recording">
451
+ <svg id="micIcon" xmlns="http://www.w3.org/2000/svg" width="38" height="38" viewBox="0 0 24 24" fill="none"
452
+ stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
453
+ class="lucide lucide-mic-icon lucide-mic">
454
+ <path d="M12 19v3" />
455
+ <path d="M19 10v2a7 7 0 0 1-14 0v-2" />
456
+ <rect x="9" y="2" width="6" height="13" rx="3" />
457
+ </svg>
458
+ <svg id="stopIcon" style="display:none;" xmlns="http://www.w3.org/2000/svg" fill="currentColor"
459
+ viewBox="0 0 24 24">
460
+ <rect x="6" y="6" width="12" height="12" rx="2" />
461
+ </svg>
462
+ </div>
463
+ <div id="recorderStatus" class="recorder-status">Ready to record</div>
464
+ </div>
465
+ </section>
466
+
467
+ <section class="card">
468
+ <h3><span style="color:var(--cyan);">💬</span> Live Transcription</h3>
469
+ <div class="live">
470
+ <div id="live" style="min-height:32px;color:#bcbcbc;">Start recording to see live transcription</div>
471
+ </div>
472
+ </section>
473
+ <!-- <section class="card files">
474
+ <h4><span style="color:var(--accent2);">📁</span> Recording Files</h4>
475
+ <div id="audio-player-container"></div>
476
+ <div id="transcript-container"></div>
477
+ <div id="files"></div>
478
+ </section> -->
479
+ </main>
480
+ <aside class="sidebar">
481
+ <h3><span style="color:var(--accent2);">⚙️</span> Recording Settings</h3>
482
+ <label for="mic">Microphone Device</label>
483
+ <select id="mic" disabled>
484
+ <option value="1" selected>Default Microphone (#1)</option>
485
+ </select>
486
+ <label for="sys">System Audio (Optional)</label>
487
+ <select id="sys" disabled>
488
+ <option value="16" selected>System Loopback (#16)</option>
489
+ </select>
490
+ <label for="chunk_secs">Chunk Length (seconds)</label>
491
+ <input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
492
+ <label for="model">Transcription Model</label>
493
+ <select id="model" disabled>
494
+ <option value="small">Small (Fast)</option>
495
+ <option value="medium" selected>Medium (Balanced)</option>
496
+ <option value="large">Large (Accurate)</option>
497
+ </select>
498
+ <div class="toggle-row">
499
+ <span class="toggle-label">Disable Transcription</span>
500
+ <label class="toggle-switch">
501
+ <input type="checkbox" id="no_transcribe">
502
+ <span class="toggle-slider"></span>
503
+ </label>
504
+ </div>
505
+ <div class="status" id="status"></div>
506
+ </aside>
507
+ </div>
508
+ <script>
509
+ // --- Recording Button Logic ---
510
+ let isRecording = false;
511
+ let polling = null;
512
+ const recorderBtn = document.getElementById('recorderBtn');
513
+ const micIcon = document.getElementById('micIcon');
514
+ const stopIcon = document.getElementById('stopIcon');
515
+ const recorderStatus = document.getElementById('recorderStatus');
516
+ const startBtn = recorderBtn; // Use the big round button
517
+
518
+ function setRecordingUI(recording) {
519
+ isRecording = recording;
520
+ if (recording) {
521
+ recorderBtn.classList.add('recording');
522
+ micIcon.style.display = 'none';
523
+ stopIcon.style.display = '';
524
+ recorderStatus.textContent = 'Recording...';
525
+ recorderStatus.classList.add('recording');
526
+ } else {
527
+ recorderBtn.classList.remove('recording');
528
+ micIcon.style.display = '';
529
+ stopIcon.style.display = 'none';
530
+ recorderStatus.textContent = 'Ready to record';
531
+ recorderStatus.classList.remove('recording');
532
+ }
533
+ }
534
+
535
+ recorderBtn.onclick = async function () {
536
+ if (!isRecording) {
537
+ await startRecording();
538
+ } else {
539
+ await stopRecording();
540
+ }
541
+ };
542
+
543
+ async function startRecording() {
544
+ const mic = 1;
545
+ const sys = 16;
546
+ const chunk_secs = 5;
547
+ const model = "medium";
548
+ const no_transcribe = document.getElementById('no_transcribe').checked;
549
+ const statusEl = document.getElementById('status');
550
+
551
+ // Show immediate feedback
552
+ statusEl.textContent = 'Starting...';
553
+ // reset color to normal (css var)
554
+ statusEl.style.color = 'var(--accent2)';
555
+
556
+ try {
557
+ const resp = await fetch('/api/start-recording', {
558
+ method: 'POST',
559
+ headers: { 'Content-Type': 'application/json' },
560
+ body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
561
+ });
562
+ // const resp = await fetch('/transcribe_live/start', {
563
+ // method: 'POST',
564
+ // headers: { 'Content-Type': 'application/json' },
565
+ // body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
566
+ // });
567
+
568
+ if (!resp.ok) {
569
+ // Attempt to parse JSON { error: "..." } from backend
570
+ let errMsg = `Failed to start recording (${resp.status})`;
571
+ try {
572
+ const json = await resp.json();
573
+ if (json && json.error) errMsg = json.error;
574
+ } catch (e) {
575
+ // ignore parse error, keep fallback message
576
+ }
577
+ statusEl.textContent = errMsg;
578
+ statusEl.style.color = 'var(--danger)'; // show as error
579
+ setRecordingUI(false);
580
+ return; // don't start polling
581
+ }
582
+
583
+ // Success: start UI + polling
584
+ statusEl.textContent = 'Recording...';
585
+ statusEl.style.color = 'var(--danger)'; // recording color
586
+ setRecordingUI(true);
587
+ pollStatus();
588
+
589
+ } catch (err) {
590
+ // Network / unexpected error
591
+ statusEl.textContent = 'Network error: could not start recording';
592
+ statusEl.style.color = 'var(--danger)';
593
+ setRecordingUI(false);
594
+ console.error("startRecording error:", err);
595
+ }
596
+ }
597
+
598
+ async function stopRecording() {
599
+ await fetch('/api/stop-recording', { method: 'POST' });
600
+ document.getElementById('status').textContent = 'Stopping...';
601
+ setRecordingUI(false);
602
+ if (polling) clearInterval(polling);
603
+ setTimeout(() => { loadFiles(); }, 2000);
604
+ }
605
+
606
+ // --- Poll status ---
607
+ function pollStatus() {
608
+ polling = setInterval(async () => {
609
+ const res = await fetch('/api/recording-status');
610
+ const data = await res.json();
611
+ setRecordingUI(data.recording);
612
+
613
+ // --- Show live transcription ---
614
+ const liveDiv = document.getElementById('live');
615
+ liveDiv.innerHTML = '';
616
+ if (data.live_segments && data.live_segments.length) {
617
+ data.live_segments.slice(-10).forEach(seg => {
618
+ const div = document.createElement('div');
619
+ div.className = 'chunk';
620
+ div.innerHTML = `<b>${seg.speaker || 'Speaker'}:</b> [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.text}`;
621
+ liveDiv.appendChild(div);
622
+ });
623
+ requestAnimationFrame(() => {
624
+ liveDiv.scrollTop = liveDiv.scrollHeight;
625
+ });
626
+ } else {
627
+ liveDiv.textContent = 'No Transcription Yet...';
628
+ }
629
+
630
+ if (!data.recording) {
631
+ clearInterval(polling);
632
+ setRecordingUI(false);
633
+ loadFiles();
634
+ }
635
+ }, 1000);
636
+ }
637
+ // Helper to format time
638
+ function formatTime(s) {
639
+ if (s == null) return "0:00";
640
+ const mm = Math.floor(s / 60);
641
+ const ss = Math.floor(s % 60).toString().padStart(2, "0");
642
+ return `${mm}:${ss}`;
643
+ }
644
+
645
+ // --- Load final files and display audio player and transcript ---
646
+ async function loadFiles() {
647
+ const filesDiv = document.getElementById('files');
648
+ const audioPlayerDiv = document.getElementById('audio-player-container');
649
+ const transcriptDiv = document.getElementById('transcript-container');
650
+ filesDiv.innerHTML = '';
651
+ audioPlayerDiv.innerHTML = '';
652
+ transcriptDiv.innerHTML = '';
653
+
654
+ try {
655
+ const res = await fetch('/api/final-files');
656
+ const data = await res.json();
657
+ if (!data.files.length) {
658
+ filesDiv.textContent = 'No files yet.';
659
+ return;
660
+ }
661
+
662
+ // Find the latest recorded_audio_{RUN_TIMESTAMP}.wav and transcript_{RUN_TIMESTAMP}.txt
663
+ let audioFile = null, transcriptFile = null;
664
+ data.files.forEach(f => {
665
+ if (/^recorded_audio_.*\.wav$/.test(f.name)) audioFile = f;
666
+ if (/^transcript_.*\.txt$/.test(f.name)) transcriptFile = f;
667
+ });
668
+
669
+ // Display audio player with waveform (using wavesurfer.js if available, else fallback)
670
+ if (audioFile) {
671
+ audioPlayerDiv.innerHTML = `
672
+ <div style="margin-bottom:12px;">
673
+ <b>${audioFile.name}</b>
674
+ </div>
675
+ <div id="waveform" style="width:100%;height:80px;background:#2d254a;border-radius:6px;"></div>
676
+ <audio id="audio-player" controls style="width:100%;margin-top:8px;">
677
+ <source src="${audioFile.url || audioFile.path}" type="audio/wav">
678
+ Your browser does not support the audio element.
679
+ </audio>
680
+ `;
681
+ // Try to use wavesurfer.js for waveform
682
+ if (window.WaveSurfer) {
683
+ const wavesurfer = WaveSurfer.create({
684
+ container: '#waveform',
685
+ waveColor: '#a259ec',
686
+ progressColor: '#2563eb',
687
+ height: 80,
688
+ barWidth: 2,
689
+ responsive: true,
690
+ cursorColor: '#dc2626'
691
+ });
692
+ wavesurfer.load(audioFile.url || audioFile.path);
693
+ // Sync play/pause with audio element
694
+ const audioElem = document.getElementById('audio-player');
695
+ audioElem.addEventListener('play', () => wavesurfer.play());
696
+ audioElem.addEventListener('pause', () => wavesurfer.pause());
697
+ wavesurfer.on('seek', (progress) => {
698
+ audioElem.currentTime = progress * audioElem.duration;
699
+ });
700
+ audioElem.addEventListener('timeupdate', () => {
701
+ if (!audioElem.paused) {
702
+ wavesurfer.seekTo(audioElem.currentTime / audioElem.duration);
703
+ }
704
+ });
705
+ } else {
706
+ document.getElementById('waveform').innerHTML = '<div style="color:#64748b;text-align:center;padding-top:28px;">(Waveform preview requires wavesurfer.js)</div>';
707
+ }
708
+ }
709
+
710
+ // Display transcript file content
711
+ if (transcriptFile) {
712
+ transcriptDiv.innerHTML = `
713
+ <b>${transcriptFile.name}</b>
714
+ <pre id="transcript-content"></pre>
715
+ `;
716
+ // Fetch and display transcript text
717
+ fetch(transcriptFile.url || transcriptFile.path)
718
+ .then(r => r.text())
719
+ .then(txt => {
720
+ document.getElementById('transcript-content').textContent = txt;
721
+ });
722
+ }
723
+
724
+ // List other files (if any)
725
+ data.files.forEach(f => {
726
+ if (
727
+ (audioFile && f.name === audioFile.name) ||
728
+ (transcriptFile && f.name === transcriptFile.name)
729
+ ) return;
730
+ const div = document.createElement('div');
731
+ div.className = 'file';
732
+ div.innerHTML = `<span>${f.name}</span> <a href="${f.url || f.path}" target="_blank">Download</a>`;
733
+ filesDiv.appendChild(div);
734
+ });
735
+ } catch (e) {
736
+ filesDiv.textContent = 'Error loading files.';
737
+ }
738
+ }
739
+
740
+ // --- On load ---
741
+ loadFiles();
742
+
743
+ // Optionally load wavesurfer.js dynamically if not present
744
+ if (!window.WaveSurfer) {
745
+ const script = document.createElement('script');
746
+ script.src = "https://unpkg.com/wavesurfer.js";
747
+ script.onload = () => { /* will auto-init on next loadFiles() call */ };
748
+ document.head.appendChild(script);
749
+ }
750
+ </script>
751
+ </body>
752
+
753
+ </html>
templates/index2_upload.html ADDED
@@ -0,0 +1,736 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <title>Audio Transcription Studio</title>
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
8
+ <link rel="icon" href=".../icons8-speech recognition-external-smashingstocks-glyph-smashing-stocks-32.png?v=2">
9
+ <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:400,600,700&display=swap">
10
+ <style>
11
+ :root {
12
+ --bg: #18122b;
13
+ --bg-card: #231942;
14
+ --bg-card2: #251e3e;
15
+ --accent: #a259ec;
16
+ --accent2: #2563eb;
17
+ --text: #fff;
18
+ --text-muted: #bcbcbc;
19
+ --border: #312e4a;
20
+ --success: #22c55e;
21
+ --danger: #dc2626;
22
+ --cyan: #00fff7;
23
+ }
24
+
25
+ html,
26
+ body {
27
+ height: 100%;
28
+ margin: 0;
29
+ padding: 0;
30
+ font-family: 'Inter', Arial, sans-serif;
31
+ background: var(--bg);
32
+ color: var(--text);
33
+ }
34
+
35
+ .layout {
36
+ display: flex;
37
+ min-height: 100vh;
38
+ gap: 32px;
39
+ padding: 32px;
40
+ box-sizing: border-box;
41
+ }
42
+
43
+ .main-panel {
44
+ flex: 2;
45
+ display: flex;
46
+ flex-direction: column;
47
+ gap: 24px;
48
+ }
49
+
50
+ .card {
51
+ background: var(--bg-card);
52
+ border-radius: 18px;
53
+ box-shadow: 0 2px 16px #0003;
54
+ padding: 32px 32px 24px 32px;
55
+ margin-bottom: 0;
56
+ border: 1.5px solid var(--border);
57
+ }
58
+
59
+ .card h2,
60
+ .card h3 {
61
+ margin-top: 0;
62
+ color: var(--accent);
63
+ font-size: 1.5em;
64
+ font-weight: 700;
65
+ margin-bottom: 18px;
66
+ letter-spacing: 1px;
67
+ }
68
+
69
+ .sidebar {
70
+ flex: 1;
71
+ min-width: 320px;
72
+ background: var(--bg-card2);
73
+ border-radius: 18px;
74
+ box-shadow: 0 2px 16px #0003;
75
+ padding: 32px 28px 24px 28px;
76
+ display: flex;
77
+ flex-direction: column;
78
+ gap: 32px;
79
+ border: 1.5px solid var(--border);
80
+ height: fit-content;
81
+ }
82
+
83
+ .sidebar h3 {
84
+ color: var(--accent2);
85
+ font-size: 1.2em;
86
+ font-weight: 700;
87
+ margin-bottom: 18px;
88
+ letter-spacing: 1px;
89
+ display: flex;
90
+ align-items: center;
91
+ gap: 8px;
92
+ }
93
+
94
+ .sidebar label {
95
+ font-size: 1em;
96
+ color: var(--text-muted);
97
+ margin-top: 18px;
98
+ font-weight: 600;
99
+ display: flex;
100
+ align-items: center;
101
+ gap: 8px;
102
+ }
103
+
104
+ .sidebar select,
105
+ .sidebar input[type="number"] {
106
+ width: 100%;
107
+ margin-top: 6px;
108
+ padding: 10px;
109
+ border-radius: 8px;
110
+ border: 1px solid var(--border);
111
+ background: #201c3a;
112
+ color: var(--text);
113
+ font-size: 1em;
114
+ margin-bottom: 10px;
115
+ outline: none;
116
+ transition: border 0.2s;
117
+ }
118
+
119
+ .sidebar select:focus,
120
+ .sidebar input[type="number"]:focus {
121
+ border: 1.5px solid var(--accent2);
122
+ }
123
+
124
+ .sidebar button {
125
+ width: 100%;
126
+ padding: 14px 0;
127
+ margin-top: 18px;
128
+ border: none;
129
+ border-radius: 8px;
130
+ background: var(--accent);
131
+ color: #fff;
132
+ font-size: 1.1em;
133
+ font-weight: 600;
134
+ cursor: pointer;
135
+ transition: background 0.2s;
136
+ box-shadow: 0 2px 8px #0002;
137
+ }
138
+
139
+ .sidebar button:disabled {
140
+ background: #a5b4fc;
141
+ cursor: not-allowed;
142
+ }
143
+
144
+ .sidebar .stop-btn {
145
+ background: var(--danger);
146
+ margin-top: 8px;
147
+ }
148
+
149
+ .toggle-row {
150
+ display: flex;
151
+ align-items: center;
152
+ gap: 10px;
153
+ margin-top: 10px;
154
+ }
155
+
156
+ .toggle-label {
157
+ flex: 1;
158
+ color: var(--text-muted);
159
+ font-size: 1em;
160
+ }
161
+
162
+ .toggle-switch {
163
+ width: 38px;
164
+ height: 22px;
165
+ background: #333;
166
+ border-radius: 12px;
167
+ position: relative;
168
+ cursor: pointer;
169
+ transition: background 0.2s;
170
+ }
171
+
172
+ .toggle-switch input {
173
+ display: none;
174
+ }
175
+
176
+ .toggle-slider {
177
+ position: absolute;
178
+ top: 2px;
179
+ left: 2px;
180
+ width: 18px;
181
+ height: 18px;
182
+ background: var(--accent2);
183
+ border-radius: 50%;
184
+ transition: left 0.2s;
185
+ }
186
+
187
+ .toggle-switch input:checked+.toggle-slider {
188
+ left: 18px;
189
+ background: var(--danger);
190
+ }
191
+
192
+ .status {
193
+ margin: 18px 0 0 0;
194
+ font-weight: bold;
195
+ color: var(--success);
196
+ font-size: 1.1em;
197
+ text-align: center;
198
+ }
199
+
200
+ .recorder-center {
201
+ display: flex;
202
+ flex-direction: column;
203
+ align-items: center;
204
+ gap: 18px;
205
+ margin-bottom: 18px;
206
+ }
207
+
208
+ .recorder-btn {
209
+ width: 90px;
210
+ height: 90px;
211
+ border-radius: 50%;
212
+ background: linear-gradient(135deg, #a259ec 60%, #2563eb 100%);
213
+ display: flex;
214
+ align-items: center;
215
+ justify-content: center;
216
+ box-shadow: 0 0 32px #a259ec55;
217
+ cursor: pointer;
218
+ transition: box-shadow 0.2s, background 0.2s;
219
+ position: relative;
220
+ }
221
+
222
+ .recorder-btn.recording {
223
+ background: linear-gradient(135deg, #dc2626 60%, #a259ec 100%);
224
+ box-shadow: 0 0 32px #dc262655;
225
+ animation: pulse 1.2s infinite;
226
+ }
227
+
228
+ @keyframes pulse {
229
+ 0% {
230
+ box-shadow: 0 0 32px #dc262655;
231
+ }
232
+
233
+ 50% {
234
+ box-shadow: 0 0 48px #dc2626aa;
235
+ }
236
+
237
+ 100% {
238
+ box-shadow: 0 0 32px #dc262655;
239
+ }
240
+ }
241
+
242
+ .recorder-btn svg {
243
+ width: 38px;
244
+ height: 38px;
245
+ color: #fff;
246
+ }
247
+
248
+ .recorder-status {
249
+ color: var(--success);
250
+ font-size: 1.1em;
251
+ font-weight: 600;
252
+ margin-top: 8px;
253
+ }
254
+
255
+ .recorder-status.recording {
256
+ color: var(--danger);
257
+ }
258
+
259
+ .live {
260
+ margin-top: 0;
261
+ background: #201c3a;
262
+ border-radius: 12px;
263
+ padding: 18px 18px 10px 18px;
264
+ min-height: 90px;
265
+ border: 1px solid var(--border);
266
+ overflow: hidden;
267
+ display: flex;
268
+ flex-direction: column;
269
+ }
270
+
271
+ /* inner container which actually scrolls */
272
+ #live {
273
+ flex: 1 1 auto;
274
+ overflow-y: auto;
275
+ padding-right: 6px;
276
+ -webkit-overflow-scrolling: touch;
277
+ scroll-behavior: smooth;
278
+ color: var(--text-muted);
279
+ }
280
+
281
+ .live h4 {
282
+ margin: 0 0 10px 0;
283
+ color: var(--cyan);
284
+ font-size: 1.08em;
285
+ font-weight: 600;
286
+ display: flex;
287
+ align-items: center;
288
+ gap: 8px;
289
+ }
290
+
291
+ .chunk {
292
+ background: linear-gradient(90deg, rgba(45, 37, 74, 0.2), rgba(38, 32, 63, 0.12));
293
+ margin-bottom: 8px;
294
+ padding: 10px 12px;
295
+ border-radius: 8px;
296
+ font-size: 0.98em;
297
+ color: var(--text);
298
+ box-shadow: 0 1px 2px #0002;
299
+ border: 1px solid rgba(255, 255, 255, 0.02);
300
+ }
301
+
302
+ .chunk b {
303
+ color: var(--cyan);
304
+ margin-right: 6px;
305
+ font-weight: 700;
306
+ }
307
+
308
+ /* THEMED SCROLLBAR - WebKit (Chrome, Edge, Safari) */
309
+ #live::-webkit-scrollbar {
310
+ width: 10px;
311
+ }
312
+
313
+ #live::-webkit-scrollbar-track {
314
+ background: rgba(255, 255, 255, 0.02);
315
+ border-radius: 10px;
316
+ }
317
+
318
+ #live::-webkit-scrollbar-thumb {
319
+ background: linear-gradient(180deg, var(--accent) 0%, var(--accent2) 100%);
320
+ border-radius: 10px;
321
+ border: 2px solid rgba(0, 0, 0, 0.15);
322
+ }
323
+
324
+ #live::-webkit-scrollbar-thumb:hover {
325
+ filter: brightness(0.95);
326
+ }
327
+
328
+ #live {
329
+ scrollbar-width: thin;
330
+ scrollbar-color: var(--accent) rgba(255, 255, 255, 0.02);
331
+ }
332
+
333
+ @media (max-width: 700px) {
334
+ .live {
335
+ max-height: 200px;
336
+ }
337
+ }
338
+
339
+ .files h4 {
340
+ color: var(--accent2);
341
+ font-size: 1.08em;
342
+ margin: 0 0 10px 0;
343
+ font-weight: 600;
344
+ display: flex;
345
+ align-items: center;
346
+ gap: 8px;
347
+ }
348
+
349
+ .file {
350
+ background: #2d254a;
351
+ margin-bottom: 8px;
352
+ padding: 8px 12px;
353
+ border-radius: 5px;
354
+ font-size: 1em;
355
+ color: #e0e7ef;
356
+ display: flex;
357
+ align-items: center;
358
+ justify-content: space-between;
359
+ box-shadow: 0 1px 2px #0001;
360
+ }
361
+
362
+ .file a {
363
+ color: var(--accent2);
364
+ text-decoration: none;
365
+ font-weight: 500;
366
+ }
367
+
368
+ .file a:hover {
369
+ text-decoration: underline;
370
+ }
371
+
372
+ #audio-player-container {
373
+ margin-bottom: 18px;
374
+ }
375
+
376
+ #waveform {
377
+ width: 100%;
378
+ height: 80px;
379
+ background: #2d254a;
380
+ border-radius: 6px;
381
+ }
382
+
383
+ #transcript-container {
384
+ background: #2d254a;
385
+ padding: 14px;
386
+ border-radius: 6px;
387
+ margin-top: 24px;
388
+ }
389
+
390
+ #transcript-content {
391
+ margin-top: 10px;
392
+ white-space: pre-wrap;
393
+ font-size: 1em;
394
+ color: #e0e7ef;
395
+ max-height: 300px;
396
+ overflow: auto;
397
+ background: #201c3a;
398
+ padding: 10px;
399
+ border-radius: 4px;
400
+ }
401
+
402
+ @media (max-width: 1100px) {
403
+ .layout {
404
+ flex-direction: column;
405
+ gap: 0;
406
+ padding: 12px;
407
+ }
408
+
409
+ .sidebar {
410
+ min-width: unset;
411
+ width: 100%;
412
+ margin-bottom: 18px;
413
+ }
414
+
415
+ .main-panel {
416
+ padding: 0;
417
+ }
418
+ }
419
+
420
+ @media (max-width: 700px) {
421
+
422
+ .card,
423
+ .sidebar {
424
+ padding: 16px 8px 12px 8px;
425
+ }
426
+
427
+ .main-panel {
428
+ gap: 12px;
429
+ }
430
+ }
431
+
432
+ /* UPLOAD area styles */
433
+ .upload {
434
+ display: flex;
435
+ flex-direction: column;
436
+ align-items: center;
437
+ gap: 10px;
438
+ padding: 18px 22px;
439
+ border-radius: 12px;
440
+ background: rgba(255, 255, 255, 0.02);
441
+ border: 1px solid rgba(255, 255, 255, 0.03);
442
+ cursor: default;
443
+ width: 100%;
444
+ max-width: 420px;
445
+ margin: 0 auto;
446
+ }
447
+
448
+ .upload-btn {
449
+ display: flex;
450
+ flex-direction: column;
451
+ align-items: center;
452
+ gap: 10px;
453
+ cursor: pointer;
454
+ outline: none;
455
+ user-select: none;
456
+ }
457
+
458
+ .upload-btn:focus-visible {
459
+ box-shadow: 0 0 0 8px rgba(37, 99, 235, 0.08);
460
+ border-radius: 12px;
461
+ }
462
+
463
+ .upload-img {
464
+ width: 120px;
465
+ height: 96px;
466
+ object-fit: contain;
467
+ display: block;
468
+ user-select: none;
469
+ pointer-events: none;
470
+ }
471
+
472
+ .upload-text {
473
+ color: #bcbcbc;
474
+ font-weight: 700;
475
+ font-size: 14px;
476
+ text-align: center;
477
+ max-width: 220px;
478
+ word-break: break-word;
479
+ }
480
+
481
+ /* preview area inside upload container */
482
+ .upload-preview {
483
+ width: 100%;
484
+ display: flex;
485
+ flex-direction: column;
486
+ gap: 10px;
487
+ align-items: center;
488
+ margin-top: 6px;
489
+ }
490
+
491
+ .upload-preview .filename {
492
+ color: var(--text);
493
+ font-weight: 600;
494
+ font-size: 0.95em;
495
+ text-align: center;
496
+ white-space: nowrap;
497
+ overflow: hidden;
498
+ text-overflow: ellipsis;
499
+ max-width: 100%;
500
+ }
501
+
502
+ .upload-preview .controls {
503
+ display: flex;
504
+ gap: 8px;
505
+ align-items: center;
506
+ }
507
+
508
+ .btn-small {
509
+ padding: 6px 10px;
510
+ border-radius: 6px;
511
+ background: #2d254a;
512
+ color: var(--text);
513
+ border: 1px solid rgba(255, 255, 255, 0.03);
514
+ cursor: pointer;
515
+ font-weight: 600;
516
+ }
517
+ </style>
518
+ </head>
519
+
520
+ <body>
521
+ <div class="layout">
522
+ <main class="main-panel">
523
+ <section class="card">
524
+ <h2 style="text-align:center;font-size:2.2em;color:#a259ec;margin-bottom:0;">Audio Transcription Studio
525
+ </h2>
526
+ <div style="text-align:center;color:#bcbcbc;margin-bottom:24px;">
527
+ Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.
528
+ </div>
529
+ <div class="upload">
530
+
531
+ <label for="uploadFile" class="upload-btn" tabindex="0" role="button"
532
+ aria-label="Upload audio file">
533
+ <img class="upload-img" src="/static/icon_upload.png" alt="Upload icon" />
534
+ <small class="upload-text">Upload .mp3, .wav file</small>
535
+ </label>
536
+
537
+
538
+ <input id="uploadFile" type="file" accept=".mp3,.wav,audio/*" style="display:none" />
539
+
540
+
541
+ <div id="uploadPreview" class="upload-preview" aria-live="polite"></div>
542
+ </div>
543
+ </section>
544
+
545
+ <section class="card">
546
+ <h3><span style="color:var(--cyan);">💬</span> Live Transcription</h3>
547
+ <div class="live">
548
+ <div id="live" style="min-height:32px;color:#bcbcbc;">Start recording to see live transcription
549
+ </div>
550
+ </div>
551
+ </section>
552
+ </main>
553
+
554
+ <aside class="sidebar">
555
+ <h3><span style="color:var(--accent2);">⚙️</span> Recording Settings</h3>
556
+ <label for="mic">Microphone Device</label>
557
+ <select id="mic" disabled>
558
+ <option value="1" selected>Default Microphone (#1)</option>
559
+ </select>
560
+ <label for="sys">System Audio (Optional)</label>
561
+ <select id="sys" disabled>
562
+ <option value="16" selected>System Loopback (#16)</option>
563
+ </select>
564
+ <label for="chunk_secs">Chunk Length (seconds)</label>
565
+ <input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
566
+ <label for="model">Transcription Model</label>
567
+ <select id="model" disabled>
568
+ <option value="small">Small (Fast)</option>
569
+ <option value="medium" selected>Medium (Balanced)</option>
570
+ <option value="large">Large (Accurate)</option>
571
+ </select>
572
+ <!-- <div class="toggle-row">
573
+ <span class="toggle-label">Disable Transcription</span>
574
+ <label class="toggle-switch">
575
+ <input type="checkbox" id="no_transcribe">
576
+ <span class="toggle-slider"></span>
577
+ </label>
578
+ </div> -->
579
+ <div class="status" id="status"></div>
580
+ </aside>
581
+ </div>
582
+ <script>
583
+ (function () {
584
+ const uploadEl = document.getElementById('uploadFile');
585
+ const preview = document.getElementById('uploadPreview');
586
+ const live = document.getElementById('live');
587
+ const statusEl = document.getElementById('status');
588
+ let audioEl = null;
589
+ let es = null; // EventSource
590
+ let playing = false;
591
+ let currentFile = null;
592
+
593
+ async function uploadFile(file) {
594
+ const fd = new FormData();
595
+ fd.append('file', file);
596
+ const resp = await fetch('/api/upload', { method: 'POST', body: fd });
597
+ return resp.json();
598
+ }
599
+
600
+ function createAudioPlayer(url, filename) {
601
+ preview.innerHTML = '';
602
+ const container = document.createElement('div');
603
+ container.style.width = '100%';
604
+ container.style.display = 'flex';
605
+ container.style.flexDirection = 'column';
606
+ container.style.alignItems = 'center';
607
+
608
+ const audio = document.createElement('audio');
609
+ audio.controls = true;
610
+ audio.src = url;
611
+ audio.id = 'uploadedAudio';
612
+ audio.style.width = '100%';
613
+ audio.dataset.filename = filename;
614
+
615
+ const info = document.createElement('div');
616
+ info.className = 'filename';
617
+ info.textContent = filename;
618
+
619
+ container.appendChild(info);
620
+ container.appendChild(audio);
621
+ preview.appendChild(container);
622
+
623
+ // listeners
624
+ audio.addEventListener('play', onPlay);
625
+ audio.addEventListener('pause', onPause);
626
+ audioEl = audio;
627
+ }
628
+
629
+ async function onPlay() {
630
+ if (!audioEl || playing) return;
631
+ playing = true;
632
+ currentFile = audioEl.dataset.filename;
633
+
634
+ // update UI
635
+ statusEl.textContent = "▶️ Transcribing...";
636
+ statusEl.style.color = "var(--success)";
637
+
638
+ try {
639
+ await fetch('/api/start-transcribe-file', {
640
+ method: 'POST',
641
+ headers: { 'Content-Type': 'application/json' },
642
+ body: JSON.stringify({ filename: currentFile })
643
+ });
644
+ } catch (e) {
645
+ console.error('start failed', e);
646
+ }
647
+
648
+ // open SSE for transcription
649
+ if (es) es.close();
650
+ es = new EventSource('/events');
651
+ es.onmessage = function (ev) {
652
+ const line = ev.data;
653
+ if (!line) return;
654
+ appendLine(line);
655
+ };
656
+ es.onerror = (e) => {
657
+ console.warn('SSE error', e);
658
+ };
659
+ }
660
+
661
+ async function onPause() {
662
+ if (!audioEl || !playing) return;
663
+ playing = false;
664
+
665
+ statusEl.textContent = "⏸️ Stopped";
666
+ statusEl.style.color = "var(--danger)";
667
+
668
+ try {
669
+ await fetch('/stop', { method: 'POST' });
670
+ } catch (e) {
671
+ console.error('stop failed', e);
672
+ }
673
+ if (es) {
674
+ es.close();
675
+ es = null;
676
+ }
677
+ }
678
+
679
+ // function appendLine(s) {
680
+ // const chunk = document.createElement('div');
681
+ // chunk.className = 'chunk';
682
+ // chunk.textContent = s;
683
+ // live.appendChild(chunk);
684
+ // live.scrollTop = live.scrollHeight;
685
+ // }
686
+ function appendLine(s) {
687
+ const chunk = document.createElement('div');
688
+ chunk.className = 'chunk';
689
+
690
+ // Try to parse format: [file.wav] 0.00-3.00 Speaker A: Hello world
691
+ const m = s.match(/^\[(.*?)\]\s+([\d.]+)-([\d.]+)\s+Speaker\s+(\S+):\s+(.*)$/);
692
+ if (m) {
693
+ const [, file, start, end, speaker, text] = m;
694
+ chunk.innerHTML = `<b>${speaker}</b> [${start}-${end}s]: ${text}`;
695
+ } else {
696
+ chunk.textContent = s;
697
+ }
698
+
699
+ live.appendChild(chunk);
700
+ live.scrollTop = live.scrollHeight;
701
+ }
702
+
703
+ // Poll /status every few seconds (optional, keeps sidebar updated)
704
+ async function pollStatus() {
705
+ try {
706
+ const r = await fetch('/status');
707
+ const js = await r.json();
708
+ if (js.running) {
709
+ statusEl.textContent = "▶️ Running";
710
+ statusEl.style.color = "var(--success)";
711
+ } else if (!playing) {
712
+ statusEl.textContent = "⏸️ Idle";
713
+ statusEl.style.color = "var(--text-muted)";
714
+ }
715
+ } catch (e) { }
716
+ setTimeout(pollStatus, 3000);
717
+ }
718
+ pollStatus();
719
+
720
+ uploadEl.addEventListener('change', async (ev) => {
721
+ const file = ev.target.files && ev.target.files[0];
722
+ if (!file) return;
723
+
724
+ const res = await uploadFile(file);
725
+ if (res && res.success) {
726
+ createAudioPlayer(res.url, res.filename);
727
+ live.innerHTML = '<div style="color:#bcbcbc;">Ready. Play audio to start live transcription.</div>';
728
+ } else {
729
+ alert('Upload failed: ' + (res && res.error ? res.error : 'unknown'));
730
+ }
731
+ });
732
+ })();
733
+ </script>
734
+ </body>
735
+
736
+ </html>
templates/landing.html ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>LiveTranscribe — Real-time Audio Transcription</title>
7
+ <meta name="description" content="LiveTranscribe: real-time microphone + uploaded audio transcription with speaker diarization and downloadable transcripts." />
8
+ <!-- Tailwind CDN for quick styling -->
9
+ <script src="https://cdn.tailwindcss.com"></script>
10
+ <style>
11
+ /* small extra tweaks */
12
+ .feature-icon { width:48px; height:48px; }
13
+ </style>
14
+ </head>
15
+ <body class="bg-gray-50 text-gray-800 font-sans">
16
+ <header class="bg-white shadow">
17
+ <div class="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
18
+ <div class="flex items-center gap-3">
19
+ <div class="w-10 h-10 rounded-lg bg-gradient-to-tr from-indigo-500 to-purple-500 flex items-center justify-center text-white font-bold">LT</div>
20
+ <div>
21
+ <a href="#home" class="text-xl font-semibold">LiveTranscribe</a>
22
+ <div class="text-sm text-gray-500">Real-time transcription + diarization</div>
23
+ </div>
24
+ </div>
25
+ <nav class="hidden md:flex items-center gap-6 text-sm">
26
+ <a href="#features" class="hover:underline">Features</a>
27
+ <a href="#how" class="hover:underline">How it works</a>
28
+ <a href="#deploy" class="hover:underline">Deploy</a>
29
+ <a href="#try" class="px-4 py-2 rounded-lg bg-indigo-600 text-white">Get started</a>
30
+ </nav>
31
+ <div class="md:hidden">
32
+ <button id="menuBtn" class="px-3 py-2 bg-gray-100 rounded">Menu</button>
33
+ </div>
34
+ </div>
35
+ </header>
36
+
37
+ <main id="home" class="max-w-6xl mx-auto px-6 py-12">
38
+ <section class="grid md:grid-cols-2 gap-10 items-center">
39
+ <div>
40
+ <h1 class="text-4xl md:text-5xl font-extrabold leading-tight">Real-time transcription for live and uploaded audio</h1>
41
+ <p class="mt-4 text-gray-600">Record, chunk, diarize speakers and transcribe — all in real time. Useful for meetings, podcasts, lectures, and interviews.</p>
42
+
43
+ <div class="mt-6 flex gap-3">
44
+ <a href="/live" class="px-5 py-3 rounded-lg bg-indigo-600 text-white shadow">Open Live Recorder</a>
45
+ <a href="/upload" class="px-5 py-3 rounded-lg border border-gray-200">Upload & Live Transcribe</a>
46
+ </div>
47
+
48
+ <div class="mt-6 text-sm text-gray-500">
49
+ <strong>Note:</strong> The links above assume your Flask static templates are served at the project root. Adjust if your routes differ.
50
+ </div>
51
+ </div>
52
+
53
+ <!-- <div class="bg-white rounded-lg shadow p-6">
54
+ <h3 class="font-semibold">Quick demo</h3>
55
+ <ol class="mt-3 list-decimal list-inside text-gray-600 text-sm space-y-2">
56
+ <li>Open <code>/index2.html</code> to start the live recorder and see live transcription + diarization.</li>
57
+ <li>Open <code>/index2_upload.html</code> to upload an audio file and receive a streaming transcript.</li>
58
+ <li>Download transcripts or copy them from the web UI.</li>
59
+ </ol>
60
+ </div> -->
61
+ </section>
62
+
63
+ <section id="features" class="mt-14">
64
+ <h2 class="text-2xl font-bold">Features</h2>
65
+ <div class="grid md:grid-cols-3 gap-6 mt-6">
66
+ <article class="bg-white p-6 rounded-lg shadow-sm">
67
+ <img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%235a67d8' viewBox='0 0 24 24'><path d='M12 3v10c0 3.31-2.69 6-6 6H4v2h8v-2h-2c2.21 0 4-1.79 4-4V3h-6z'/></svg>" alt="microphone" />
68
+ <h4 class="mt-3 font-semibold">Live recording</h4>
69
+ <p class="text-sm text-gray-600 mt-2">Capture mic & system audio in short chunks (5s) and stream them to the transcriber.</p>
70
+ </article>
71
+
72
+ <article class="bg-white p-6 rounded-lg shadow-sm">
73
+ <img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%2337a6ff' viewBox='0 0 24 24'><path d='M12 2C8 2 5 5 5 9c0 4.42 7 13 7 13s7-8.58 7-13c0-4-3-7-7-7z'/></svg>" alt="upload" />
74
+ <h4 class="mt-3 font-semibold">Upload + stream</h4>
75
+ <p class="text-sm text-gray-600 mt-2">Users upload audio files and get live, incremental transcripts back in the browser.</p>
76
+ </article>
77
+
78
+ <article class="bg-white p-6 rounded-lg shadow-sm">
79
+ <img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%23f59e0b' viewBox='0 0 24 24'><path d='M12 12c2.21 0 4-1.79 4-4V4h-8v4c0 2.21 1.79 4 4 4z'/></svg>" alt="diarization" />
80
+ <h4 class="mt-3 font-semibold">Speaker diarization</h4>
81
+ <p class="text-sm text-gray-600 mt-2">Differentiate speakers in meetings so transcripts label speakers/segments automatically.</p>
82
+ </article>
83
+ </div>
84
+ </section>
85
+
86
+ <section id="how" class="mt-14">
87
+ <h2 class="text-2xl font-bold">How it works</h2>
88
+ <div class="mt-4 grid md:grid-cols-3 gap-6">
89
+ <div class="col-span-2 bg-white p-6 rounded-lg shadow-sm">
90
+ <h3 class="font-semibold">Architecture (high level)</h3>
91
+ <ol class="list-decimal list-inside text-gray-600 mt-3 space-y-2 text-sm">
92
+ <li>Client captures audio (microphone or upload) and sends 5s chunks to the server via WebSocket / fetch.</li>
93
+ <li>Server stores, optionally concatenates chunks and runs an ASR model (eg. Whisper, faster-whisper) in streaming mode.</li>
94
+ <li>Speaker diarization runs (pyannote) to assign speaker labels to segments.</li>
95
+ <li>Transcribed segments + speaker labels are pushed to the client and shown live.</li>
96
+ <li>User can download the full transcript or copy segments.</li>
97
+ </ol>
98
+ </div>
99
+
100
+ <div class="bg-white p-6 rounded-lg shadow-sm">
101
+ <h3 class="font-semibold">Quick tips</h3>
102
+ <ul class="text-sm text-gray-600 mt-3 list-disc list-inside space-y-2">
103
+ <li>Tune chunk-length (5s is a good start) for latency vs accuracy tradeoff.</li>
104
+ <li>Run diarization asynchronously to avoid blocking transcription if you need lower latency.</li>
105
+ <li>Provide a "finalize" button so the server can merge chunks and run a final pass for improved accuracy.</li>
106
+ </ul>
107
+ </div>
108
+ </div>
109
+ </section>
110
+
111
+ <section id="deploy" class="mt-14">
112
+ <h2 class="text-2xl font-bold">Deploy & run</h2>
113
+ <div class="mt-4 bg-white p-6 rounded-lg shadow-sm text-sm text-gray-700">
114
+ <p>Typical steps to run locally or on a VM:</p>
115
+ <pre class="mt-3 bg-gray-100 p-3 rounded text-xs overflow-auto"># 1. create venv
116
+ python -m venv .venv
117
+ source .venv/bin/activate # or .\\venv\\Scripts\\activate on Windows
118
+
119
+ # 2. install requirements
120
+ pip install -r requirements.txt
121
+
122
+ # 3. run app(s)
123
+ python app.py # live recorder
124
+ python app2.py # upload-based transcription
125
+
126
+ # 4. open in browser
127
+ http://localhost:5000/index2.html
128
+ http://localhost:5000/index2_upload.html
129
+ </pre>
130
+ <p class="mt-3">If you want a single Flask app with a landing page route, add this snippet to your Flask app:</p>
131
+ <pre class="mt-3 bg-gray-100 p-3 rounded text-xs overflow-auto">@app.route('/')
132
+ def landing():
133
+ return render_template('landing.html')
134
+ </pre>
135
+ </div>
136
+ </section>
137
+
138
+ <section id="try" class="mt-14">
139
+ <h2 class="text-2xl font-bold">Try it now</h2>
140
+ <div class="mt-4 flex flex-col md:flex-row gap-4">
141
+ <a href="/live" class="px-4 py-3 rounded-lg bg-indigo-600 text-white">Open Live Recorder</a>
142
+ <a href="/upload" class="px-4 py-3 rounded-lg border">Upload & Transcribe</a>
143
+ <a href="#deploy" class="px-4 py-3 rounded-lg border">Deployment instructions</a>
144
+ </div>
145
+ </section>
146
+
147
+ <!-- <footer class="mt-16 text-center text-sm text-gray-500 pb-8">
148
+ Built with ❤️ — Add your logo, links, and analytics here.
149
+ </footer> -->
150
+ </main>
151
+
152
+ <script>
153
+ // tiny menu toggle for mobile
154
+ const menuBtn = document.getElementById('menuBtn');
155
+ menuBtn && menuBtn.addEventListener('click', ()=>{
156
+ alert('Use the links on the page: Features, How it works, Deploy, Try it now');
157
+ });
158
+ </script>
159
+ </body>
160
+ </html>
templates/test_index.html ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <title>Audio Recorder & Transcription UI</title>
7
+ <style>
8
+ body {
9
+ font-family: 'Segoe UI', Arial, sans-serif;
10
+ background: linear-gradient(120deg, #f5f6fa 60%, #dbeafe 100%);
11
+ margin: 0;
12
+ padding: 0;
13
+ }
14
+
15
+ .container {
16
+ max-width: 750px;
17
+ margin: 40px auto;
18
+ background: #fff;
19
+ border-radius: 14px;
20
+ box-shadow: 0 4px 24px #0002;
21
+ padding: 32px 32px 24px 32px;
22
+ }
23
+
24
+ h1 {
25
+ margin-top: 0;
26
+ font-size: 2.2em;
27
+ letter-spacing: 1px;
28
+ color: #2563eb;
29
+ text-align: center;
30
+ }
31
+
32
+ label {
33
+ display: block;
34
+ margin-top: 18px;
35
+ font-weight: 600;
36
+ color: #334155;
37
+ }
38
+
39
+ select,
40
+ input[type="number"] {
41
+ margin-top: 6px;
42
+ padding: 8px;
43
+ font-size: 1em;
44
+ border-radius: 6px;
45
+ border: 1px solid #cbd5e1;
46
+ background: #f1f5f9;
47
+ width: 100%;
48
+ box-sizing: border-box;
49
+ }
50
+
51
+ button {
52
+ margin-top: 12px;
53
+ margin-right: 10px;
54
+ padding: 10px 22px;
55
+ font-size: 1em;
56
+ font-weight: 600;
57
+ border: none;
58
+ border-radius: 6px;
59
+ background: #2563eb;
60
+ color: #fff;
61
+ cursor: pointer;
62
+ transition: background 0.2s;
63
+ }
64
+
65
+ button:disabled {
66
+ background: #94a3b8;
67
+ cursor: not-allowed;
68
+ }
69
+
70
+ .stop-btn {
71
+ background: #dc2626;
72
+ }
73
+
74
+ .status {
75
+ margin-top: 18px;
76
+ font-weight: bold;
77
+ color: #0ea5e9;
78
+ text-align: center;
79
+ font-size: 1.1em;
80
+ }
81
+
82
+ .live {
83
+ margin-top: 32px;
84
+ background: #f1f5f9;
85
+ border-radius: 8px;
86
+ padding: 18px 18px 10px 18px;
87
+ }
88
+
89
+ .live h2 {
90
+ margin-top: 0;
91
+ color: #0ea5e9;
92
+ font-size: 1.2em;
93
+ }
94
+
95
+ .chunk {
96
+ background: #e0e7ef;
97
+ margin-bottom: 8px;
98
+ padding: 8px 12px;
99
+ border-radius: 5px;
100
+ font-size: 1em;
101
+ color: #334155;
102
+ box-shadow: 0 1px 2px #0001;
103
+ }
104
+
105
+ .files {
106
+ margin-top: 32px;
107
+ background: #f1f5f9;
108
+ border-radius: 8px;
109
+ padding: 18px 18px 10px 18px;
110
+ }
111
+
112
+ .files h2 {
113
+ margin-top: 0;
114
+ color: #2563eb;
115
+ font-size: 1.2em;
116
+ }
117
+
118
+ .file {
119
+ background: #e0e7ef;
120
+ margin-bottom: 8px;
121
+ padding: 8px 12px;
122
+ border-radius: 5px;
123
+ font-size: 1em;
124
+ color: #334155;
125
+ display: flex;
126
+ align-items: center;
127
+ justify-content: space-between;
128
+ box-shadow: 0 1px 2px #0001;
129
+ }
130
+
131
+ .file a {
132
+ color: #2563eb;
133
+ text-decoration: none;
134
+ font-weight: 500;
135
+ }
136
+
137
+ .file a:hover {
138
+ text-decoration: underline;
139
+ }
140
+
141
+ .footer {
142
+ margin-top: 36px;
143
+ text-align: center;
144
+ color: #64748b;
145
+ font-size: 0.95em;
146
+ }
147
+
148
+ @media (max-width: 600px) {
149
+ .container {
150
+ padding: 12px 4vw 12px 4vw;
151
+ }
152
+ }
153
+ </style>
154
+ </head>
155
+
156
+ <body>
157
+ <div class="container">
158
+ <h1>Audio Recorder & Transcription</h1>
159
+ <div>
160
+ <label for="mic">Microphone Device</label>
161
+ <select id="mic" disabled>
162
+ <option value="1" selected>Microphone Device (#1)</option>
163
+ </select>
164
+
165
+ <label for="sys">System/Loopback Device (optional)</label>
166
+ <select id="sys" disabled>
167
+ <option value="16" selected>System Loopback Device (#16)</option>
168
+ </select>
169
+
170
+ <label for="chunk_secs">Chunk Length (seconds)</label>
171
+ <input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
172
+
173
+ <label for="model">Transcription Model</label>
174
+ <select id="model" disabled>
175
+ <option value="small">small</option>
176
+ <option value="medium" selected>medium</option>
177
+ <option value="large">large</option>
178
+ </select>
179
+
180
+ <div style="margin-top:18px; text-align:center;">
181
+ <button id="startBtn">Start Recording</button>
182
+ <button id="stopBtn" class="stop-btn" disabled>Stop Recording</button>
183
+ </div>
184
+ </div>
185
+
186
+ <div class="status" id="status"></div>
187
+
188
+ <div class="live">
189
+ <h2>Live Transcription</h2>
190
+ <div id="live"></div>
191
+ </div>
192
+
193
+ <div class="files">
194
+ <h2>Final Files</h2>
195
+ <div id="files"></div>
196
+ </div>
197
+ <div class="footer">
198
+ &copy; 2025 Audio Multi-Transcript UI &middot; Powered by Flask + PyAudio + Whisper
199
+ </div>
200
+ </div>
201
+
202
+ <script>
203
+ // --- Start/Stop Recording ---
204
+ let polling = null;
205
+ document.getElementById('startBtn').onclick = async function () {
206
+ const mic = 1; // static value
207
+ const sys = 16; // static value
208
+ const chunk_secs = 5; // static value
209
+ const model = "medium"; // static value
210
+ const no_transcribe = false;
211
+ document.getElementById('status').textContent = 'Starting...';
212
+ await fetch('/api/start-recording', {
213
+ method: 'POST',
214
+ headers: { 'Content-Type': 'application/json' },
215
+ body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
216
+ });
217
+ document.getElementById('startBtn').disabled = true;
218
+ document.getElementById('stopBtn').disabled = false;
219
+ pollStatus();
220
+ };
221
+
222
+ document.getElementById('stopBtn').onclick = async function () {
223
+ await fetch('/api/stop-recording', { method: 'POST' });
224
+ document.getElementById('status').textContent = 'Stopping...';
225
+ document.getElementById('stopBtn').disabled = true;
226
+ if (polling) clearInterval(polling);
227
+ setTimeout(() => { loadFiles(); document.getElementById('startBtn').disabled = false; }, 2000);
228
+ };
229
+
230
+ // --- Poll status ---
231
+ function pollStatus() {
232
+ polling = setInterval(async () => {
233
+ const res = await fetch('/api/recording-status');
234
+ const data = await res.json();
235
+ document.getElementById('status').textContent = data.recording ? 'Recording...' : 'Idle';
236
+
237
+ // --- Show live transcription ---
238
+ const liveDiv = document.getElementById('live');
239
+ liveDiv.innerHTML = '';
240
+ if (data.live_segments && data.live_segments.length) {
241
+ data.live_segments.slice(-10).forEach(seg => {
242
+ const div = document.createElement('div');
243
+ div.className = 'chunk';
244
+ div.innerHTML = `<b>${seg.speaker || 'Speaker'}:</b> [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.text}`;
245
+ liveDiv.appendChild(div);
246
+ });
247
+ } else {
248
+ liveDiv.textContent = 'No transcription yet.';
249
+ }
250
+
251
+ if (!data.recording) {
252
+ clearInterval(polling);
253
+ document.getElementById('startBtn').disabled = false;
254
+ document.getElementById('stopBtn').disabled = true;
255
+ loadFiles();
256
+ }
257
+ }, 1000);
258
+ }
259
+ // Helper to format time
260
+ function formatTime(s) {
261
+ if (s == null) return "0:00";
262
+ const mm = Math.floor(s / 60);
263
+ const ss = Math.floor(s % 60).toString().padStart(2, "0");
264
+ return `${mm}:${ss}`;
265
+ }
266
+ // --- Load final files ---
267
+ async function loadFiles() {
268
+ const filesDiv = document.getElementById('files');
269
+ filesDiv.innerHTML = '';
270
+ try {
271
+ const res = await fetch('/api/final-files');
272
+ const data = await res.json();
273
+ if (!data.files.length) {
274
+ filesDiv.textContent = 'No files yet.';
275
+ return;
276
+ }
277
+ data.files.forEach(f => {
278
+ const div = document.createElement('div');
279
+ div.className = 'file';
280
+ div.innerHTML = `<span>${f.name}</span> <a href="${f.url || f.path}" target="_blank">Download</a>`;
281
+ filesDiv.appendChild(div);
282
+ });
283
+ } catch (e) {
284
+ filesDiv.textContent = 'Error loading files.';
285
+ }
286
+ }
287
+
288
+ // --- On load ---
289
+ loadFiles();
290
+ </script>
291
+ </body>
292
+ </html>
templates/test_index3.html ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Audio Transcription Studio</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
11
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
12
+ <style>
13
+ body {
14
+ font-family: 'Inter', sans-serif;
15
+ background-color: #1a1a2e; /* Dark purple background */
16
+ color: #ffffff;
17
+ }
18
+
19
+ .container-bg {
20
+ background-color: #2c2c44; /* Slightly lighter purple for containers */
21
+ }
22
+
23
+ .panel-bg {
24
+ background-color: #22223b; /* Darker panel background */
25
+ }
26
+
27
+ .input-field {
28
+ background-color: #3b3b55;
29
+ border: 1px solid #4a4a6b;
30
+ color: #e0e0e0;
31
+ }
32
+
33
+ .button-glow {
34
+ box-shadow: 0 0 10px 2px #6a1b9a;
35
+ }
36
+
37
+ .glow-text {
38
+ text-shadow: 0 0 8px #d1c4e9;
39
+ }
40
+ </style>
41
+ </head>
42
+ <body class="flex items-center justify-center min-h-screen p-8">
43
+ <div class="w-full max-w-6xl">
44
+ <!-- Main Header -->
45
+ <header class="text-center mb-10">
46
+ <h1 class="text-5xl font-extrabold text-[#d1c4e9] glow-text mb-2">Audio Transcription Studio</h1>
47
+ <p class="text-lg text-gray-400">Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.</p>
48
+ </header>
49
+
50
+ <!-- Main Content Grid -->
51
+ <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
52
+ <!-- Left Panel: Live Transcription -->
53
+ <div class="lg:col-span-2 panel-bg p-8 rounded-2xl shadow-xl">
54
+ <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-file-alt mr-2"></i> Live Transcription</h2>
55
+
56
+ <!-- Recording Status & Button -->
57
+ <div id="recording-status-area" class="flex flex-col items-center justify-center p-6 mb-8">
58
+ <div id="status-spinner" class="relative w-32 h-32 hidden">
59
+ <div class="absolute inset-0 border-4 border-purple-500 rounded-full animate-ping"></div>
60
+ <div class="absolute inset-4 border-4 border-purple-400 rounded-full animate-ping delay-200"></div>
61
+ <div class="absolute inset-8 border-4 border-purple-300 rounded-full animate-ping delay-400"></div>
62
+ <div class="flex items-center justify-center h-full w-full">
63
+ <i class="fas fa-microphone text-4xl text-white"></i>
64
+ </div>
65
+ </div>
66
+ <div id="status-icon" class="relative w-32 h-32 flex items-center justify-center bg-purple-600 rounded-full">
67
+ <i class="fas fa-microphone text-4xl text-white"></i>
68
+ </div>
69
+ <p id="status-text" class="mt-4 text-green-400 font-semibold text-lg">Ready to record</p>
70
+ <div id="start-stop-buttons" class="mt-4">
71
+ <button id="start-btn" class="bg-purple-600 hover:bg-purple-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 button-glow disabled:opacity-50 disabled:cursor-not-allowed">
72
+ Start Recording
73
+ </button>
74
+ <button id="stop-btn" class="bg-red-600 hover:bg-red-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 disabled:opacity-50 disabled:cursor-not-allowed hidden">
75
+ Stop Recording
76
+ </button>
77
+ </div>
78
+ </div>
79
+
80
+ <!-- Live Transcription Display -->
81
+ <div id="live-transcription" class="bg-[#1b1b2a] p-6 rounded-lg h-96 overflow-y-auto border border-[#3b3b55]">
82
+ <p class="text-gray-400 text-center text-lg mt-12">Start recording to see live transcription</p>
83
+ </div>
84
+ </div>
85
+
86
+ <!-- Right Panel: Recording Settings & Files -->
87
+ <div class="lg:col-span-1 space-y-8">
88
+ <!-- Recording Settings Panel -->
89
+ <div class="panel-bg p-8 rounded-2xl shadow-xl">
90
+ <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-cogs mr-2"></i> Recording Settings</h2>
91
+ <div class="space-y-6">
92
+ <!-- Microphone Device -->
93
+ <div>
94
+ <label for="mic-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-microphone mr-2"></i>Microphone Device</label>
95
+ <select id="mic-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
96
+ <option value="">Loading devices...</option>
97
+ </select>
98
+ </div>
99
+
100
+ <!-- System Audio -->
101
+ <div>
102
+ <label for="sys-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-desktop mr-2"></i>System Audio (Optional)</label>
103
+ <select id="sys-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
104
+ <option value="null">None</option>
105
+ </select>
106
+ </div>
107
+
108
+ <!-- Chunk Length -->
109
+ <div>
110
+ <label for="chunk-secs-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-clock mr-2"></i>Chunk Length (seconds)</label>
111
+ <input type="number" id="chunk-secs-input" value="5" min="1" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
112
+ </div>
113
+
114
+ <!-- Transcription Model -->
115
+ <div>
116
+ <label for="model-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-brain mr-2"></i>Transcription Model</label>
117
+ <select id="model-input" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
118
+ <option value="medium">Medium (Balanced)</option>
119
+ <option value="small">Small</option>
120
+ <option value="large">Large</option>
121
+ </select>
122
+ </div>
123
+
124
+ <!-- Disable Transcription Toggle -->
125
+ <div class="flex items-center">
126
+ <input id="no-transcribe-checkbox" type="checkbox" class="h-5 w-5 text-purple-600 focus:ring-purple-500 rounded border-gray-600 bg-gray-700">
127
+ <label for="no-transcribe-checkbox" class="ml-2 block text-sm text-gray-300">Disable Transcription</label>
128
+ </div>
129
+ </div>
130
+ </div>
131
+
132
+ <!-- Recording Files Panel -->
133
+ <div class="panel-bg p-8 rounded-2xl shadow-xl">
134
+ <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-folder-open mr-2"></i> Recording Files</h2>
135
+ <div id="final-files-list" class="space-y-2 text-gray-300">
136
+ <p class="text-gray-500">No files yet...</p>
137
+ </div>
138
+ </div>
139
+ </div>
140
+ </div>
141
+ </div>
142
+
143
+ <script>
144
+ const micSelect = document.getElementById('mic-select');
145
+ const sysSelect = document.getElementById('sys-select');
146
+ const chunkSecsInput = document.getElementById('chunk-secs-input');
147
+ const modelInput = document.getElementById('model-input');
148
+ const noTranscribeCheckbox = document.getElementById('no-transcribe-checkbox');
149
+ const startBtn = document.getElementById('start-btn');
150
+ const stopBtn = document.getElementById('stop-btn');
151
+ const statusText = document.getElementById('status-text');
152
+ const liveTranscription = document.getElementById('live-transcription');
153
+ const finalFilesList = document.getElementById('final-files-list');
154
+ const statusIcon = document.getElementById('status-icon');
155
+ const statusSpinner = document.getElementById('status-spinner');
156
+
157
+ let statusPollingInterval;
158
+
159
+ // Fetch available audio devices and populate the dropdowns
160
+ async function fetchDevices() {
161
+ try {
162
+ const response = await fetch('/api/devices');
163
+ const data = await response.json();
164
+
165
+ const micOptions = data.devices.map(device => `<option value="${device.index}">${device.name}</option>`).join('');
166
+ micSelect.innerHTML = micOptions;
167
+
168
+ const sysOptions = `<option value="null">None</option>` + micOptions;
169
+ sysSelect.innerHTML = sysOptions;
170
+
171
+ if (data.devices.length > 0) {
172
+ micSelect.value = data.devices[0].index;
173
+ }
174
+ } catch (error) {
175
+ console.error('Error fetching devices:', error);
176
+ micSelect.innerHTML = `<option>Error loading devices</option>`;
177
+ sysSelect.innerHTML = `<option>Error loading devices</option>`;
178
+ }
179
+ }
180
+
181
+ // Fetch final files and display them
182
+ async function fetchFinalFiles() {
183
+ try {
184
+ const response = await fetch('/api/final-files');
185
+ const data = await response.json();
186
+ if (data.files.length > 0) {
187
+ const filesHtml = data.files.map(file => `
188
+ <a href="${file.url}" class="flex items-center text-purple-400 hover:text-purple-300 transition-colors duration-200" target="_blank">
189
+ <i class="fas fa-file-waveform mr-2"></i><span>${file.name}</span>
190
+ </a>
191
+ `).join('');
192
+ finalFilesList.innerHTML = filesHtml;
193
+ } else {
194
+ finalFilesList.innerHTML = `<p class="text-gray-500">No files yet...</p>`;
195
+ }
196
+ } catch (error) {
197
+ console.error('Error fetching final files:', error);
198
+ finalFilesList.innerHTML = `<p class="text-red-500">Error loading files.</p>`;
199
+ }
200
+ }
201
+
202
+ // Poll the server for recording status and live segments
203
+ function startStatusPolling() {
204
+ statusPollingInterval = setInterval(async () => {
205
+ try {
206
+ const response = await fetch('/api/recording-status');
207
+ const data = await response.json();
208
+
209
+ if (data.recording) {
210
+ statusText.textContent = 'Recording...';
211
+ statusText.classList.remove('text-green-400');
212
+ statusText.classList.add('text-purple-400');
213
+ statusIcon.classList.add('hidden');
214
+ statusSpinner.classList.remove('hidden');
215
+
216
+ liveTranscription.innerHTML = '';
217
+ if (data.live_segments.length === 0) {
218
+ liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Recording started. Waiting for transcription...</p>`;
219
+ } else {
220
+ data.live_segments.forEach(segment => {
221
+ const p = document.createElement('p');
222
+ p.className = 'text-gray-200 mb-1 leading-snug';
223
+ p.innerHTML = `<span class="font-semibold text-purple-300">${segment.speaker}:</span> ${segment.text}`;
224
+ liveTranscription.appendChild(p);
225
+ });
226
+ liveTranscription.scrollTop = liveTranscription.scrollHeight;
227
+ }
228
+ fetchFinalFiles();
229
+
230
+ } else {
231
+ statusText.textContent = 'Ready to record';
232
+ statusText.classList.remove('text-purple-400');
233
+ statusText.classList.add('text-green-400');
234
+ statusIcon.classList.remove('hidden');
235
+ statusSpinner.classList.add('hidden');
236
+ clearInterval(statusPollingInterval);
237
+ startBtn.classList.remove('hidden');
238
+ stopBtn.classList.add('hidden');
239
+ fetchFinalFiles();
240
+ }
241
+ } catch (error) {
242
+ console.error('Error polling status:', error);
243
+ clearInterval(statusPollingInterval);
244
+ }
245
+ }, 1000);
246
+ }
247
+
248
+ // Start recording
249
+ startBtn.addEventListener('click', async () => {
250
+ const mic = micSelect.value;
251
+ const sys = sysSelect.value === 'null' ? null : sysSelect.value;
252
+ const chunk_secs = chunkSecsInput.value;
253
+ const model = modelInput.value;
254
+ const no_transcribe = noTranscribeCheckbox.checked;
255
+
256
+ try {
257
+ const response = await fetch('/api/start-recording', {
258
+ method: 'POST',
259
+ headers: { 'Content-Type': 'application/json' },
260
+ body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
261
+ });
262
+
263
+ if (response.ok) {
264
+ startBtn.classList.add('hidden');
265
+ stopBtn.classList.remove('hidden');
266
+ liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Starting recording...</p>`;
267
+ startStatusPolling();
268
+ } else {
269
+ const error = await response.json();
270
+ alert(`Error: ${error.error}`);
271
+ }
272
+ } catch (error) {
273
+ console.error('Failed to start recording:', error);
274
+ alert('Failed to start recording. Check server connection.');
275
+ }
276
+ });
277
+
278
+ // Stop recording
279
+ stopBtn.addEventListener('click', async () => {
280
+ try {
281
+ const response = await fetch('/api/stop-recording', {
282
+ method: 'POST'
283
+ });
284
+ if (response.ok) {
285
+ // Status polling will handle UI updates after the server stops
286
+ }
287
+ } catch (error) {
288
+ console.error('Failed to stop recording:', error);
289
+ }
290
+ });
291
+
292
+ // Initial setup on page load
293
+ window.onload = () => {
294
+ fetchDevices();
295
+ fetchFinalFiles();
296
+ };
297
+
298
+ </script>
299
+ </body>
300
+ </html>