Spaces:
Sleeping
Sleeping
Upload 12 files
Browse files- Dockerfile +12 -0
- app.py +312 -0
- app2.py +291 -0
- merged.py +559 -0
- rec_transcribe_extension.py +345 -0
- requirements.txt +171 -0
- static/icon_upload.png +0 -0
- templates/index2.html +753 -0
- templates/index2_upload.html +736 -0
- templates/landing.html +160 -0
- templates/test_index.html +292 -0
- templates/test_index3.html +300 -0
Dockerfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12.2
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
COPY . .
|
| 7 |
+
|
| 8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
EXPOSE 7860
|
| 11 |
+
|
| 12 |
+
CMD ["python", "merged.py"]
|
app.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, send_from_directory, render_template
|
| 2 |
+
import threading
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
import queue
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import pyaudio
|
| 8 |
+
from werkzeug.utils import secure_filename
|
| 9 |
+
from rec_transcribe_extension import Transcriber, diarization_hook
|
| 10 |
+
from rec_transcribe_extension import (
|
| 11 |
+
list_input_devices,
|
| 12 |
+
run_recording,
|
| 13 |
+
OUTPUT_DIR,
|
| 14 |
+
CHUNKS_DIR,
|
| 15 |
+
FINAL_WAV,)
|
| 16 |
+
|
| 17 |
+
app = Flask(__name__)
|
| 18 |
+
|
| 19 |
+
recording_thread = None
|
| 20 |
+
recording_running = False
|
| 21 |
+
|
| 22 |
+
recording_status = {
|
| 23 |
+
"recording": False,
|
| 24 |
+
"live_segments": []
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
# ------ Device Listing API ------
|
| 28 |
+
@app.route("/api/devices", methods=["GET"])
|
| 29 |
+
def api_devices():
|
| 30 |
+
pa = pyaudio.PyAudio()
|
| 31 |
+
devices = []
|
| 32 |
+
for i in range(pa.get_device_count()):
|
| 33 |
+
dev = pa.get_device_info_by_index(i)
|
| 34 |
+
if dev.get("maxInputChannels", 0) > 0:
|
| 35 |
+
devices.append({"index": dev["index"], "name": dev["name"]})
|
| 36 |
+
pa.terminate()
|
| 37 |
+
return jsonify({"devices": devices})
|
| 38 |
+
|
| 39 |
+
# --- Start recording ---
|
| 40 |
+
@app.route("/api/start-recording", methods=["POST"])
|
| 41 |
+
def api_start_recording():
|
| 42 |
+
global recording_thread, stop_event, recording_status
|
| 43 |
+
data = request.json
|
| 44 |
+
# Validate required fields
|
| 45 |
+
try:
|
| 46 |
+
mic = int(data.get("mic"))
|
| 47 |
+
except Exception:
|
| 48 |
+
return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
|
| 49 |
+
|
| 50 |
+
# sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
|
| 51 |
+
sys = None
|
| 52 |
+
if data.get("sys") not in (None, "", "null"):
|
| 53 |
+
try:
|
| 54 |
+
sys = int(data.get("sys"))
|
| 55 |
+
except Exception:
|
| 56 |
+
return jsonify({"error": "Invalid 'sys' parameter"}), 400
|
| 57 |
+
|
| 58 |
+
chunk_secs = int(data.get("chunk_secs", 5))
|
| 59 |
+
model = data.get("model", "medium")
|
| 60 |
+
no_transcribe = bool(data.get("no_transcribe", False))
|
| 61 |
+
if recording_status["recording"]:
|
| 62 |
+
return jsonify({"error": "Already recording"}), 400
|
| 63 |
+
|
| 64 |
+
# --- Validate that requested devices exist and have input channels ---
|
| 65 |
+
try:
|
| 66 |
+
pa = pyaudio.PyAudio()
|
| 67 |
+
except Exception as e:
|
| 68 |
+
return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
|
| 69 |
+
|
| 70 |
+
def device_is_valid(device_index):
|
| 71 |
+
try:
|
| 72 |
+
dev = pa.get_device_info_by_index(device_index)
|
| 73 |
+
return dev.get("maxInputChannels", 0) > 0
|
| 74 |
+
except Exception:
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
if not device_is_valid(mic):
|
| 78 |
+
pa.terminate()
|
| 79 |
+
return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
|
| 80 |
+
|
| 81 |
+
if sys is not None and not device_is_valid(sys):
|
| 82 |
+
pa.terminate()
|
| 83 |
+
return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
|
| 84 |
+
|
| 85 |
+
pa.terminate()
|
| 86 |
+
|
| 87 |
+
# Reset state
|
| 88 |
+
recording_status["recording"] = True
|
| 89 |
+
recording_status["live_segments"] = []
|
| 90 |
+
stop_event = threading.Event()
|
| 91 |
+
|
| 92 |
+
def run():
|
| 93 |
+
# Patch: update live_segments after each chunk
|
| 94 |
+
from rec_transcribe_extension import chunk_writer_and_transcribe_worker
|
| 95 |
+
|
| 96 |
+
# Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
|
| 97 |
+
import rec_transcribe_extension as rte
|
| 98 |
+
orig_worker = rte.chunk_writer_and_transcribe_worker
|
| 99 |
+
|
| 100 |
+
def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
|
| 101 |
+
while True:
|
| 102 |
+
try:
|
| 103 |
+
filename, frames = in_queue.get(timeout=1.0)
|
| 104 |
+
except queue.Empty:
|
| 105 |
+
if stop_event.is_set() and in_queue.empty():
|
| 106 |
+
break
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
rte.save_wav_from_frames(
|
| 110 |
+
filename, frames, nchannels=rte.CHANNELS)
|
| 111 |
+
final_frames_list.extend(frames)
|
| 112 |
+
|
| 113 |
+
diar = rte.diarization_hook(str(filename))
|
| 114 |
+
diar_segments = diar if diar else []
|
| 115 |
+
|
| 116 |
+
# Transcribe chunk and get segments with timestamps
|
| 117 |
+
if transcriber and transcriber.model:
|
| 118 |
+
try:
|
| 119 |
+
segments, info = transcriber.model.transcribe(
|
| 120 |
+
str(filename), beam_size=5)
|
| 121 |
+
for seg in segments:
|
| 122 |
+
seg_start = seg.start
|
| 123 |
+
seg_end = seg.end
|
| 124 |
+
seg_text = seg.text.strip()
|
| 125 |
+
speaker = "Unknown"
|
| 126 |
+
for d_start, d_end, d_speaker in diar_segments:
|
| 127 |
+
if (seg_start < d_end) and (seg_end > d_start):
|
| 128 |
+
speaker = d_speaker
|
| 129 |
+
break
|
| 130 |
+
# Update live_segments for frontend
|
| 131 |
+
recording_status["live_segments"].append({
|
| 132 |
+
"start": float(seg_start),
|
| 133 |
+
"end": float(seg_end),
|
| 134 |
+
"speaker": str(speaker),
|
| 135 |
+
"text": seg_text
|
| 136 |
+
})
|
| 137 |
+
# Write to transcript file as before
|
| 138 |
+
line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
|
| 139 |
+
with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 140 |
+
tf.write(line)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"Transcription error for {filename.name}: {e}")
|
| 143 |
+
print("Chunk writer/transcriber worker exiting.")
|
| 144 |
+
|
| 145 |
+
rte.chunk_writer_and_transcribe_worker = patched_worker
|
| 146 |
+
try:
|
| 147 |
+
rte.stop_event = stop_event
|
| 148 |
+
run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
|
| 149 |
+
model_name=model, no_transcribe=no_transcribe)
|
| 150 |
+
finally:
|
| 151 |
+
rte.chunk_writer_and_transcribe_worker = orig_worker
|
| 152 |
+
recording_status["recording"] = False
|
| 153 |
+
|
| 154 |
+
recording_thread = threading.Thread(target=run, daemon=True)
|
| 155 |
+
recording_thread.start()
|
| 156 |
+
return jsonify({"ok": True})
|
| 157 |
+
|
| 158 |
+
# --- Stop recording ---
|
| 159 |
+
@app.route("/api/stop-recording", methods=["POST"])
|
| 160 |
+
def api_stop_recording():
|
| 161 |
+
global stop_event
|
| 162 |
+
if stop_event:
|
| 163 |
+
stop_event.set()
|
| 164 |
+
return jsonify({"ok": True})
|
| 165 |
+
|
| 166 |
+
# --- Poll status ---
|
| 167 |
+
@app.route("/api/recording-status")
|
| 168 |
+
def api_recording_status():
|
| 169 |
+
return jsonify(recording_status)
|
| 170 |
+
|
| 171 |
+
# # serve saved uploads at /uploads/<filename>
|
| 172 |
+
# @app.route('/uploads/<path:filename>')
|
| 173 |
+
# def serve_uploaded(filename):
|
| 174 |
+
# return send_from_directory(str(OUTPUT_DIR), filename)
|
| 175 |
+
|
| 176 |
+
# # --- upload pre-recorded files ---
|
| 177 |
+
# @app.route("/api/upload", methods=["POST"])
|
| 178 |
+
# def api_upload_file():
|
| 179 |
+
# """
|
| 180 |
+
# Accept a single file (form-data 'file'), save it into OUTPUT_DIR and return json
|
| 181 |
+
# { ok: True, filename: "<saved_name>", url: "/static/<saved_name>" }.
|
| 182 |
+
# """
|
| 183 |
+
# if 'file' not in request.files:
|
| 184 |
+
# return jsonify({"error": "No file provided"}), 400
|
| 185 |
+
# f = request.files['file']
|
| 186 |
+
# if f.filename == '':
|
| 187 |
+
# return jsonify({"error": "Empty filename"}), 400
|
| 188 |
+
|
| 189 |
+
# safe_name = secure_filename(f.filename)
|
| 190 |
+
# # prefix timestamp to avoid collisions
|
| 191 |
+
# ts = int(time.time() * 1000)
|
| 192 |
+
# saved_name = f"{ts}_{safe_name}"
|
| 193 |
+
# saved_path = OUTPUT_DIR / saved_name
|
| 194 |
+
# try:
|
| 195 |
+
# f.save(str(saved_path))
|
| 196 |
+
# except Exception as e:
|
| 197 |
+
# return jsonify({"error": f"Failed to save file: {e}"}), 500
|
| 198 |
+
|
| 199 |
+
# return jsonify({"ok": True, "filename": saved_name, "url": f"/static/{saved_name}"})
|
| 200 |
+
|
| 201 |
+
# # --- Start server-side paced transcription for a saved WAV/MP3 file ---
|
| 202 |
+
# @app.route("/api/start-transcribe-file", methods=["POST"])
|
| 203 |
+
# def api_start_transcribe_file():
|
| 204 |
+
# """
|
| 205 |
+
# POST JSON { filename: "<saved_name>" }
|
| 206 |
+
# Spawns a background thread that transcribes the file using the Transcriber,
|
| 207 |
+
# and appends transcribed segments (with start/end/speaker/text) into
|
| 208 |
+
# recording_status["live_segments"] while setting recording_status["recording"]=True.
|
| 209 |
+
# The worker will pace segments to approximate 'live' streaming using seg.start timestamps.
|
| 210 |
+
# """
|
| 211 |
+
# global recording_status
|
| 212 |
+
# data = request.json or {}
|
| 213 |
+
# filename = data.get("filename")
|
| 214 |
+
# print("DEBUG: /api/start-transcribe-file called with:", filename, flush=True)
|
| 215 |
+
|
| 216 |
+
# if not filename:
|
| 217 |
+
# return jsonify({"error": "Missing 'filename'"}), 400
|
| 218 |
+
|
| 219 |
+
# file_path = OUTPUT_DIR / filename
|
| 220 |
+
# if not file_path.exists():
|
| 221 |
+
# return jsonify({"error": "File not found on server"}), 404
|
| 222 |
+
|
| 223 |
+
# # prevent concurrent transcription runs
|
| 224 |
+
# if recording_status.get("recording"):
|
| 225 |
+
# return jsonify({"error": "Another transcription/recording is already running"}), 400
|
| 226 |
+
|
| 227 |
+
# def worker():
|
| 228 |
+
# try:
|
| 229 |
+
# recording_status["recording"] = True
|
| 230 |
+
# recording_status["live_segments"] = []
|
| 231 |
+
|
| 232 |
+
# transcriber = Transcriber()
|
| 233 |
+
# if not transcriber.model:
|
| 234 |
+
# # model not loaded/available
|
| 235 |
+
# recording_status["recording"] = False
|
| 236 |
+
# print("Transcription model not available; cannot transcribe file.")
|
| 237 |
+
# return
|
| 238 |
+
|
| 239 |
+
# # perform diarization if available
|
| 240 |
+
# diar_segments = diarization_hook(str(file_path)) or []
|
| 241 |
+
|
| 242 |
+
# # get segments from model
|
| 243 |
+
# try:
|
| 244 |
+
# segments, info = transcriber.model.transcribe(str(file_path), beam_size=5)
|
| 245 |
+
# except Exception as e:
|
| 246 |
+
# print("Error during transcription:", e)
|
| 247 |
+
# recording_status["recording"] = False
|
| 248 |
+
# return
|
| 249 |
+
|
| 250 |
+
# # Stream the segments into recording_status with timing
|
| 251 |
+
# start_clock = time.time()
|
| 252 |
+
# for seg in segments:
|
| 253 |
+
# # seg.start is seconds into the audio
|
| 254 |
+
# wait_for = seg.start - (time.time() - start_clock)
|
| 255 |
+
# if wait_for > 0:
|
| 256 |
+
# time.sleep(wait_for)
|
| 257 |
+
|
| 258 |
+
# # map speaker using diarization segments (best-effort overlap)
|
| 259 |
+
# speaker = "Unknown"
|
| 260 |
+
# for d_start, d_end, d_label in diar_segments:
|
| 261 |
+
# if (seg.start < d_end) and (seg.end > d_start):
|
| 262 |
+
# speaker = d_label
|
| 263 |
+
# break
|
| 264 |
+
|
| 265 |
+
# seg_obj = {
|
| 266 |
+
# "start": float(seg.start),
|
| 267 |
+
# "end": float(seg.end),
|
| 268 |
+
# "speaker": str(speaker),
|
| 269 |
+
# "text": seg.text.strip()
|
| 270 |
+
# }
|
| 271 |
+
|
| 272 |
+
# # append to shared status for frontend polling
|
| 273 |
+
# recording_status.setdefault("live_segments", []).append(seg_obj)
|
| 274 |
+
|
| 275 |
+
# # also append to transcript file for persistence (optional)
|
| 276 |
+
# with open(rec_transcribe_extension.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 277 |
+
# line = f"[{filename}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
|
| 278 |
+
# tf.write(line)
|
| 279 |
+
|
| 280 |
+
# # done streaming
|
| 281 |
+
# recording_status["recording"] = False
|
| 282 |
+
# except Exception as e:
|
| 283 |
+
# print("Error in transcription worker:", e)
|
| 284 |
+
# recording_status["recording"] = False
|
| 285 |
+
|
| 286 |
+
# t = threading.Thread(target=worker, daemon=True)
|
| 287 |
+
# t.start()
|
| 288 |
+
# return jsonify({"ok": True})
|
| 289 |
+
|
| 290 |
+
# --- List final files ---
|
| 291 |
+
@app.route("/api/final-files")
|
| 292 |
+
def api_final_files():
|
| 293 |
+
files = []
|
| 294 |
+
out_dir = OUTPUT_DIR
|
| 295 |
+
for fname in os.listdir(out_dir):
|
| 296 |
+
if fname.endswith(".wav") or fname.endswith(".txt"):
|
| 297 |
+
files.append(
|
| 298 |
+
{"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"})
|
| 299 |
+
return jsonify({"files": files})
|
| 300 |
+
|
| 301 |
+
# --- Serve static files (WAV, TXT) ---
|
| 302 |
+
@app.route('/static/<path:filename>')
|
| 303 |
+
def static_files(filename):
|
| 304 |
+
return send_from_directory(OUTPUT_DIR, filename)
|
| 305 |
+
|
| 306 |
+
# --- Serve the frontend ---
|
| 307 |
+
@app.route("/")
|
| 308 |
+
def index():
|
| 309 |
+
return render_template("index2.html")
|
| 310 |
+
|
| 311 |
+
if __name__ == "__main__":
|
| 312 |
+
app.run(port=5000, debug=True)
|
app2.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
"""
|
| 3 |
+
Flask app to:
|
| 4 |
+
1) serve the provided upload template,
|
| 5 |
+
2) accept .mp3/.wav uploads and show an audio player,
|
| 6 |
+
3) start/stop recording from a system loopback device when the audio element plays/pauses,
|
| 7 |
+
4) stream live transcription back to the browser via Server-Sent Events (SSE).
|
| 8 |
+
|
| 9 |
+
Notes:
|
| 10 |
+
- Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.
|
| 11 |
+
- Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.
|
| 12 |
+
- This app assumes it runs on the same machine that has access to the local audio devices.
|
| 13 |
+
"""
|
| 14 |
+
import os
|
| 15 |
+
import time
|
| 16 |
+
import threading
|
| 17 |
+
import pathlib
|
| 18 |
+
from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
|
| 19 |
+
from werkzeug.utils import secure_filename
|
| 20 |
+
|
| 21 |
+
# import your recorder/transcriber helper (uploaded by you)
|
| 22 |
+
import rec_transcribe_extension as rte
|
| 23 |
+
|
| 24 |
+
UPLOAD_FOLDER = "uploads"
|
| 25 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
|
| 28 |
+
|
| 29 |
+
app = Flask(__name__, static_folder=None)
|
| 30 |
+
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
| 31 |
+
|
| 32 |
+
# Globals for recording thread management
|
| 33 |
+
recording_thread = None
|
| 34 |
+
recording_lock = threading.Lock()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def allowed_file(filename):
|
| 38 |
+
ext = pathlib.Path(filename).suffix.lower()
|
| 39 |
+
return ext in ALLOWED_EXT
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def find_system_loopback_index():
|
| 43 |
+
"""
|
| 44 |
+
Try to find a likely loopback / system audio input device.
|
| 45 |
+
Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
|
| 46 |
+
'virtual', 'audio cable'. Otherwise fallback to default input device.
|
| 47 |
+
"""
|
| 48 |
+
pa = None
|
| 49 |
+
try:
|
| 50 |
+
import pyaudio
|
| 51 |
+
pa = pyaudio.PyAudio()
|
| 52 |
+
except Exception:
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
keywords = ["loop", "stereo", "mix", "what u hear",
|
| 56 |
+
"virtual", "audio cable", "loopback", "monitor"]
|
| 57 |
+
best_idx = None
|
| 58 |
+
for i in range(pa.get_device_count()):
|
| 59 |
+
try:
|
| 60 |
+
dev = pa.get_device_info_by_index(i)
|
| 61 |
+
name = (dev.get("name") or "").lower()
|
| 62 |
+
max_in = dev.get("maxInputChannels", 0)
|
| 63 |
+
if max_in <= 0:
|
| 64 |
+
continue
|
| 65 |
+
for kw in keywords:
|
| 66 |
+
if kw in name:
|
| 67 |
+
best_idx = int(dev["index"])
|
| 68 |
+
pa.terminate()
|
| 69 |
+
return best_idx
|
| 70 |
+
except Exception:
|
| 71 |
+
continue
|
| 72 |
+
|
| 73 |
+
# fallback: default input device
|
| 74 |
+
try:
|
| 75 |
+
default_info = pa.get_default_input_device_info()
|
| 76 |
+
idx = int(default_info.get("index"))
|
| 77 |
+
pa.terminate()
|
| 78 |
+
return idx
|
| 79 |
+
except Exception:
|
| 80 |
+
if pa:
|
| 81 |
+
pa.terminate()
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@app.route("/", methods=["GET"])
|
| 86 |
+
def index():
|
| 87 |
+
return render_template("index2_upload.html")
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
@app.route("/upload", methods=["POST"])
|
| 91 |
+
def upload():
|
| 92 |
+
if 'file' not in request.files:
|
| 93 |
+
return jsonify(success=False, error="No file part"), 400
|
| 94 |
+
f = request.files['file']
|
| 95 |
+
if f.filename == '':
|
| 96 |
+
return jsonify(success=False, error="Empty filename"), 400
|
| 97 |
+
filename = secure_filename(f.filename)
|
| 98 |
+
if not allowed_file(filename):
|
| 99 |
+
return jsonify(success=False, error="Extension not allowed"), 400
|
| 100 |
+
|
| 101 |
+
# avoid collisions by prefixing timestamp
|
| 102 |
+
ts = int(time.time() * 1000)
|
| 103 |
+
filename = f"{ts}_{filename}"
|
| 104 |
+
save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 105 |
+
f.save(save_path)
|
| 106 |
+
url = f"/uploads/{filename}"
|
| 107 |
+
return jsonify(success=True, url=url, filename=filename)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@app.route("/uploads/<path:filename>")
|
| 111 |
+
def uploaded_file(filename):
|
| 112 |
+
return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
@app.route("/start", methods=["POST"])
|
| 116 |
+
def start_recording():
|
| 117 |
+
"""
|
| 118 |
+
Start a background thread which calls rec_transcribe_extension.run_recording(...)
|
| 119 |
+
We try to detect a loopback device; if not found we pick the default input device.
|
| 120 |
+
"""
|
| 121 |
+
global recording_thread
|
| 122 |
+
body = request.get_json(force=True, silent=True) or {}
|
| 123 |
+
filename = body.get('filename')
|
| 124 |
+
|
| 125 |
+
# Basic check: uploaded file exists (we don't actually play the file on the server,
|
| 126 |
+
# but it's a sanity check so user didn't start without uploading)
|
| 127 |
+
if filename:
|
| 128 |
+
if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
|
| 129 |
+
return jsonify(success=False, error="Uploaded file not found on server"), 400
|
| 130 |
+
|
| 131 |
+
with recording_lock:
|
| 132 |
+
# if there's an active recording, return ok
|
| 133 |
+
if recording_thread and recording_thread.is_alive():
|
| 134 |
+
return jsonify(success=True, message="Recording already running")
|
| 135 |
+
# clear any previous stop_event
|
| 136 |
+
try:
|
| 137 |
+
if hasattr(rte, 'stop_event'):
|
| 138 |
+
rte.stop_event = threading.Event() # new event the run_recording will wait on
|
| 139 |
+
except Exception:
|
| 140 |
+
pass
|
| 141 |
+
|
| 142 |
+
# choose device: prefer loopback
|
| 143 |
+
dev_index = find_system_loopback_index()
|
| 144 |
+
if dev_index is None:
|
| 145 |
+
return jsonify(success=False, error="No suitable audio input device found on server"), 500
|
| 146 |
+
|
| 147 |
+
# Start the recording in a background thread
|
| 148 |
+
def target():
|
| 149 |
+
try:
|
| 150 |
+
from rec_transcribe_extension import chunk_writer_and_transcribe_worker
|
| 151 |
+
import rec_transcribe_extension as rte
|
| 152 |
+
orig_worker = rte.chunk_writer_and_transcribe_worker
|
| 153 |
+
|
| 154 |
+
def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
|
| 155 |
+
while True:
|
| 156 |
+
try:
|
| 157 |
+
filename, frames = in_queue.get(timeout=1.0)
|
| 158 |
+
except queue.Empty:
|
| 159 |
+
if rte.stop_event.is_set() and in_queue.empty():
|
| 160 |
+
break
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
rte.save_wav_from_frames(
|
| 164 |
+
filename, frames, nchannels=rte.CHANNELS)
|
| 165 |
+
final_frames_list.extend(frames)
|
| 166 |
+
|
| 167 |
+
diar_segments = rte.diarization_hook(str(filename)) or []
|
| 168 |
+
|
| 169 |
+
if transcriber and transcriber.model:
|
| 170 |
+
try:
|
| 171 |
+
segments, info = transcriber.model.transcribe(
|
| 172 |
+
str(filename), beam_size=5)
|
| 173 |
+
for seg in segments:
|
| 174 |
+
seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
|
| 175 |
+
speaker = "Unknown"
|
| 176 |
+
for d_start, d_end, d_speaker in diar_segments:
|
| 177 |
+
if (seg_start < d_end) and (seg_end > d_start):
|
| 178 |
+
speaker = d_speaker
|
| 179 |
+
break
|
| 180 |
+
# Write formatted diarization line
|
| 181 |
+
line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
|
| 182 |
+
with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 183 |
+
tf.write(line)
|
| 184 |
+
except Exception as e:
|
| 185 |
+
print(f"Transcription error for {filename}: {e}")
|
| 186 |
+
|
| 187 |
+
print("Patched worker exiting.")
|
| 188 |
+
|
| 189 |
+
# apply patch
|
| 190 |
+
rte.chunk_writer_and_transcribe_worker = patched_worker
|
| 191 |
+
try:
|
| 192 |
+
rte.run_recording(mic_index=dev_index, sys_index=None,
|
| 193 |
+
chunk_secs=getattr(
|
| 194 |
+
rte, 'CHUNK_DURATION_SECS', 3),
|
| 195 |
+
model_name=getattr(rte, 'MODEL_NAME', None),
|
| 196 |
+
no_transcribe=False)
|
| 197 |
+
finally:
|
| 198 |
+
rte.chunk_writer_and_transcribe_worker = orig_worker
|
| 199 |
+
except Exception as e:
|
| 200 |
+
print("run_recording exception:", e)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
@app.route("/stop", methods=["POST"])
|
| 204 |
+
def stop_recording():
|
| 205 |
+
"""
|
| 206 |
+
Signal the rec_transcribe_extension stop_event to stop gracefully.
|
| 207 |
+
"""
|
| 208 |
+
global recording_thread
|
| 209 |
+
with recording_lock:
|
| 210 |
+
# set the stop_event in module
|
| 211 |
+
if hasattr(rte, 'stop_event') and rte.stop_event is not None:
|
| 212 |
+
try:
|
| 213 |
+
rte.stop_event.set()
|
| 214 |
+
except Exception:
|
| 215 |
+
pass
|
| 216 |
+
return jsonify(success=True, message="Stop signal sent")
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def tail_transcript_file(path, stop_cond_fn=None):
|
| 220 |
+
"""
|
| 221 |
+
Generator that tails the transcript file and yields SSE data lines.
|
| 222 |
+
If file doesn't exist yet, yield a short status message then keep waiting.
|
| 223 |
+
stop_cond_fn is a callable that when returns True will break.
|
| 224 |
+
"""
|
| 225 |
+
last_pos = 0
|
| 226 |
+
sent_initial = False
|
| 227 |
+
while True:
|
| 228 |
+
if stop_cond_fn and stop_cond_fn():
|
| 229 |
+
break
|
| 230 |
+
if os.path.exists(path):
|
| 231 |
+
with open(path, "r", encoding="utf-8", errors="ignore") as fh:
|
| 232 |
+
fh.seek(last_pos)
|
| 233 |
+
lines = fh.readlines()
|
| 234 |
+
if lines:
|
| 235 |
+
for ln in lines:
|
| 236 |
+
ln = ln.strip()
|
| 237 |
+
if ln:
|
| 238 |
+
yield f"data: {ln}\n\n"
|
| 239 |
+
last_pos = fh.tell()
|
| 240 |
+
sent_initial = True
|
| 241 |
+
else:
|
| 242 |
+
# no new lines
|
| 243 |
+
time.sleep(0.25)
|
| 244 |
+
else:
|
| 245 |
+
if not sent_initial:
|
| 246 |
+
yield "data: [info] Transcript file not yet created. Waiting...\n\n"
|
| 247 |
+
sent_initial = True
|
| 248 |
+
time.sleep(0.5)
|
| 249 |
+
# final notification
|
| 250 |
+
yield "data: [info] Transcription ended.\n\n"
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
@app.route("/events")
|
| 254 |
+
def events():
|
| 255 |
+
"""
|
| 256 |
+
SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
|
| 257 |
+
The stream ends when the module stop_event is set and the background recording thread finishes.
|
| 258 |
+
"""
|
| 259 |
+
transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
|
| 260 |
+
if not transcript_path:
|
| 261 |
+
return Response("No transcript file configured", status=500)
|
| 262 |
+
transcript_path = str(transcript_path)
|
| 263 |
+
|
| 264 |
+
def stop_fn():
|
| 265 |
+
# stop when the recording thread is no longer alive AND the module stop_event is set
|
| 266 |
+
cond = False
|
| 267 |
+
try:
|
| 268 |
+
cond = (hasattr(rte, 'stop_event')
|
| 269 |
+
and rte.stop_event is not None and rte.stop_event.is_set())
|
| 270 |
+
except Exception:
|
| 271 |
+
cond = False
|
| 272 |
+
# also stop if thread finished
|
| 273 |
+
t_alive = recording_thread.is_alive() if recording_thread is not None else False
|
| 274 |
+
# If stop requested and thread not alive -> end stream
|
| 275 |
+
return (cond and not t_alive)
|
| 276 |
+
|
| 277 |
+
return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
|
| 278 |
+
mimetype="text/event-stream")
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
@app.route("/status")
|
| 282 |
+
def status():
|
| 283 |
+
running = False
|
| 284 |
+
if recording_thread and recording_thread.is_alive():
|
| 285 |
+
running = True
|
| 286 |
+
return jsonify(running=running)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
if __name__ == "__main__":
|
| 290 |
+
# run on localhost for local usage
|
| 291 |
+
app.run(host="0.0.0.0", port=7860, threaded=True)
|
merged.py
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# main.py
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import threading
|
| 5 |
+
import queue
|
| 6 |
+
import pathlib
|
| 7 |
+
import pyaudio
|
| 8 |
+
from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
|
| 9 |
+
from werkzeug.utils import secure_filename
|
| 10 |
+
|
| 11 |
+
# your helper module
|
| 12 |
+
import rec_transcribe_extension as rte
|
| 13 |
+
from rec_transcribe_extension import Transcriber, diarization_hook, run_recording, OUTPUT_DIR
|
| 14 |
+
|
| 15 |
+
app = Flask(__name__)
|
| 16 |
+
UPLOAD_FOLDER = "uploads"
|
| 17 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 18 |
+
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
| 19 |
+
|
| 20 |
+
ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def allowed_file(filename: str) -> bool:
|
| 24 |
+
"""Check if file extension is allowed"""
|
| 25 |
+
ext = pathlib.Path(filename).suffix.lower()
|
| 26 |
+
return ext in ALLOWED_EXT
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ---------------- Shared state ----------------
|
| 30 |
+
recording_thread = None
|
| 31 |
+
recording_running = False
|
| 32 |
+
recording_lock = threading.Lock()
|
| 33 |
+
|
| 34 |
+
recording_status = {
|
| 35 |
+
"recording": False,
|
| 36 |
+
"live_segments": []
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
# ---------------- Landing + Frontend ----------------
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@app.route("/")
|
| 43 |
+
def landing():
|
| 44 |
+
return render_template("landing.html")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@app.route("/live")
|
| 48 |
+
def live_page():
|
| 49 |
+
return render_template("index2.html")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@app.route("/upload")
|
| 53 |
+
def upload_page():
|
| 54 |
+
return render_template("index2_upload.html")
|
| 55 |
+
|
| 56 |
+
# ---------------- Device listing ----------------
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@app.route("/api/devices", methods=["GET"])
|
| 60 |
+
def api_devices():
|
| 61 |
+
pa = pyaudio.PyAudio()
|
| 62 |
+
devices = []
|
| 63 |
+
for i in range(pa.get_device_count()):
|
| 64 |
+
dev = pa.get_device_info_by_index(i)
|
| 65 |
+
if dev.get("maxInputChannels", 0) > 0:
|
| 66 |
+
devices.append({"index": dev["index"], "name": dev["name"]})
|
| 67 |
+
pa.terminate()
|
| 68 |
+
return jsonify({"devices": devices})
|
| 69 |
+
|
| 70 |
+
# --- Start recording ---
|
| 71 |
+
@app.route("/api/start-recording", methods=["POST"])
|
| 72 |
+
def api_start_recording():
|
| 73 |
+
global recording_thread, stop_event, recording_status
|
| 74 |
+
data = request.json
|
| 75 |
+
# Validate required fields
|
| 76 |
+
try:
|
| 77 |
+
mic = int(data.get("mic"))
|
| 78 |
+
except Exception:
|
| 79 |
+
return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
|
| 80 |
+
|
| 81 |
+
# sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
|
| 82 |
+
sys = None
|
| 83 |
+
if data.get("sys") not in (None, "", "null"):
|
| 84 |
+
try:
|
| 85 |
+
sys = int(data.get("sys"))
|
| 86 |
+
except Exception:
|
| 87 |
+
return jsonify({"error": "Invalid 'sys' parameter"}), 400
|
| 88 |
+
|
| 89 |
+
chunk_secs = int(data.get("chunk_secs", 5))
|
| 90 |
+
model = data.get("model", "medium")
|
| 91 |
+
no_transcribe = bool(data.get("no_transcribe", False))
|
| 92 |
+
if recording_status["recording"]:
|
| 93 |
+
return jsonify({"error": "Already recording"}), 400
|
| 94 |
+
|
| 95 |
+
# --- Validate that requested devices exist and have input channels ---
|
| 96 |
+
try:
|
| 97 |
+
pa = pyaudio.PyAudio()
|
| 98 |
+
except Exception as e:
|
| 99 |
+
return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
|
| 100 |
+
|
| 101 |
+
def device_is_valid(device_index):
|
| 102 |
+
try:
|
| 103 |
+
dev = pa.get_device_info_by_index(device_index)
|
| 104 |
+
return dev.get("maxInputChannels", 0) > 0
|
| 105 |
+
except Exception:
|
| 106 |
+
return False
|
| 107 |
+
|
| 108 |
+
if not device_is_valid(mic):
|
| 109 |
+
pa.terminate()
|
| 110 |
+
return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
|
| 111 |
+
|
| 112 |
+
if sys is not None and not device_is_valid(sys):
|
| 113 |
+
pa.terminate()
|
| 114 |
+
return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
|
| 115 |
+
|
| 116 |
+
pa.terminate()
|
| 117 |
+
|
| 118 |
+
# Reset state
|
| 119 |
+
recording_status["recording"] = True
|
| 120 |
+
recording_status["live_segments"] = []
|
| 121 |
+
stop_event = threading.Event()
|
| 122 |
+
|
| 123 |
+
def run():
|
| 124 |
+
# Patch: update live_segments after each chunk
|
| 125 |
+
from rec_transcribe_extension import chunk_writer_and_transcribe_worker
|
| 126 |
+
|
| 127 |
+
# Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
|
| 128 |
+
import rec_transcribe_extension as rte
|
| 129 |
+
orig_worker = rte.chunk_writer_and_transcribe_worker
|
| 130 |
+
|
| 131 |
+
def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
|
| 132 |
+
while True:
|
| 133 |
+
try:
|
| 134 |
+
filename, frames = in_queue.get(timeout=1.0)
|
| 135 |
+
except queue.Empty:
|
| 136 |
+
if stop_event.is_set() and in_queue.empty():
|
| 137 |
+
break
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
rte.save_wav_from_frames(
|
| 141 |
+
filename, frames, nchannels=rte.CHANNELS)
|
| 142 |
+
final_frames_list.extend(frames)
|
| 143 |
+
|
| 144 |
+
diar = rte.diarization_hook(str(filename))
|
| 145 |
+
diar_segments = diar if diar else []
|
| 146 |
+
|
| 147 |
+
# Transcribe chunk and get segments with timestamps
|
| 148 |
+
if transcriber and transcriber.model:
|
| 149 |
+
try:
|
| 150 |
+
segments, info = transcriber.model.transcribe(
|
| 151 |
+
str(filename), beam_size=5)
|
| 152 |
+
for seg in segments:
|
| 153 |
+
seg_start = seg.start
|
| 154 |
+
seg_end = seg.end
|
| 155 |
+
seg_text = seg.text.strip()
|
| 156 |
+
speaker = "Unknown"
|
| 157 |
+
for d_start, d_end, d_speaker in diar_segments:
|
| 158 |
+
if (seg_start < d_end) and (seg_end > d_start):
|
| 159 |
+
speaker = d_speaker
|
| 160 |
+
break
|
| 161 |
+
# Update live_segments for frontend
|
| 162 |
+
recording_status["live_segments"].append({
|
| 163 |
+
"start": float(seg_start),
|
| 164 |
+
"end": float(seg_end),
|
| 165 |
+
"speaker": str(speaker),
|
| 166 |
+
"text": seg_text
|
| 167 |
+
})
|
| 168 |
+
# Write to transcript file as before
|
| 169 |
+
line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
|
| 170 |
+
with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 171 |
+
tf.write(line)
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f"Transcription error for {filename.name}: {e}")
|
| 174 |
+
print("Chunk writer/transcriber worker exiting.")
|
| 175 |
+
|
| 176 |
+
rte.chunk_writer_and_transcribe_worker = patched_worker
|
| 177 |
+
try:
|
| 178 |
+
rte.stop_event = stop_event
|
| 179 |
+
run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
|
| 180 |
+
model_name=model, no_transcribe=no_transcribe)
|
| 181 |
+
finally:
|
| 182 |
+
rte.chunk_writer_and_transcribe_worker = orig_worker
|
| 183 |
+
recording_status["recording"] = False
|
| 184 |
+
|
| 185 |
+
recording_thread = threading.Thread(target=run, daemon=True)
|
| 186 |
+
recording_thread.start()
|
| 187 |
+
return jsonify({"ok": True})
|
| 188 |
+
|
| 189 |
+
# # ---------------- Recording APIs ----------------
|
| 190 |
+
# @app.route("/api/start-recording", methods=["POST"])
|
| 191 |
+
# def api_start_recording():
|
| 192 |
+
# global recording_thread, recording_status
|
| 193 |
+
# data = request.json or {}
|
| 194 |
+
|
| 195 |
+
# mic = int(data.get("mic", -1))
|
| 196 |
+
# sys = data.get("sys")
|
| 197 |
+
# if sys in (None, "", "null"):
|
| 198 |
+
# sys = None
|
| 199 |
+
# else:
|
| 200 |
+
# sys = int(sys)
|
| 201 |
+
|
| 202 |
+
# chunk_secs = int(data.get("chunk_secs", 5))
|
| 203 |
+
# model = data.get("model", "medium")
|
| 204 |
+
# no_transcribe = bool(data.get("no_transcribe", False))
|
| 205 |
+
|
| 206 |
+
# if recording_status["recording"]:
|
| 207 |
+
# return jsonify({"error": "Already recording"}), 400
|
| 208 |
+
|
| 209 |
+
# # validate devices
|
| 210 |
+
# pa = pyaudio.PyAudio()
|
| 211 |
+
# def valid(dev_idx):
|
| 212 |
+
# try:
|
| 213 |
+
# dev = pa.get_device_info_by_index(dev_idx)
|
| 214 |
+
# return dev.get("maxInputChannels", 0) > 0
|
| 215 |
+
# except Exception:
|
| 216 |
+
# return False
|
| 217 |
+
# if not valid(mic):
|
| 218 |
+
# pa.terminate()
|
| 219 |
+
# return jsonify({"error": f"Mic device {mic} invalid"}), 400
|
| 220 |
+
# if sys is not None and not valid(sys):
|
| 221 |
+
# pa.terminate()
|
| 222 |
+
# return jsonify({"error": f"System device {sys} invalid"}), 400
|
| 223 |
+
# pa.terminate()
|
| 224 |
+
|
| 225 |
+
# # reset state
|
| 226 |
+
# recording_status["recording"] = True
|
| 227 |
+
# recording_status["live_segments"] = []
|
| 228 |
+
# rte.stop_event = threading.Event()
|
| 229 |
+
|
| 230 |
+
# def run():
|
| 231 |
+
# try:
|
| 232 |
+
# run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
|
| 233 |
+
# model_name=model, no_transcribe=no_transcribe)
|
| 234 |
+
# finally:
|
| 235 |
+
# recording_status["recording"] = False
|
| 236 |
+
|
| 237 |
+
# recording_thread = threading.Thread(target=run, daemon=True)
|
| 238 |
+
# recording_thread.start()
|
| 239 |
+
# return jsonify({"ok": True})
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
@app.route("/api/stop-recording", methods=["POST"])
|
| 243 |
+
def api_stop_recording():
|
| 244 |
+
if hasattr(rte, "stop_event") and rte.stop_event:
|
| 245 |
+
rte.stop_event.set()
|
| 246 |
+
return jsonify({"ok": True})
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
@app.route("/api/recording-status")
|
| 250 |
+
def api_recording_status():
|
| 251 |
+
return jsonify({
|
| 252 |
+
"recording": recording_status.get("recording", False),
|
| 253 |
+
"live_segments": recording_status.get("live_segments", [])
|
| 254 |
+
})
|
| 255 |
+
|
| 256 |
+
# ---------------- Upload-based APIs ----------------
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
@app.route("/api/upload", methods=["POST"])
|
| 260 |
+
def api_upload_file():
|
| 261 |
+
if 'file' not in request.files:
|
| 262 |
+
return jsonify(success=False, error="No file part"), 400
|
| 263 |
+
f = request.files['file']
|
| 264 |
+
if f.filename == '':
|
| 265 |
+
return jsonify(success=False, error="Empty filename"), 400
|
| 266 |
+
filename = secure_filename(f.filename)
|
| 267 |
+
if not allowed_file(filename):
|
| 268 |
+
return jsonify(success=False, error="Extension not allowed"), 400
|
| 269 |
+
|
| 270 |
+
# avoid collisions by prefixing timestamp
|
| 271 |
+
ts = int(time.time() * 1000)
|
| 272 |
+
filename = f"{ts}_{filename}"
|
| 273 |
+
save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 274 |
+
f.save(save_path)
|
| 275 |
+
url = f"/uploads/{filename}"
|
| 276 |
+
return jsonify(success=True, url=url, filename=filename)
|
| 277 |
+
|
| 278 |
+
# ---------------- File serving ----------------
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
@app.route("/uploads/<path:filename>")
|
| 282 |
+
def uploaded_file(filename):
|
| 283 |
+
return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
|
| 284 |
+
|
| 285 |
+
# @app.route("/api/start-transcribe-file", methods=["POST"])
|
| 286 |
+
# def api_start_transcribe_file():
|
| 287 |
+
# data = request.json or {}
|
| 288 |
+
# filename = data.get("filename")
|
| 289 |
+
# file_path = OUTPUT_DIR / filename
|
| 290 |
+
# if not file_path.exists():
|
| 291 |
+
# return jsonify({"error": "File not found"}), 404
|
| 292 |
+
|
| 293 |
+
# if recording_status.get("recording"):
|
| 294 |
+
# return jsonify({"error": "Busy"}), 400
|
| 295 |
+
|
| 296 |
+
# def worker():
|
| 297 |
+
# try:
|
| 298 |
+
# recording_status["recording"] = True
|
| 299 |
+
# recording_status["live_segments"] = []
|
| 300 |
+
# transcriber = Transcriber()
|
| 301 |
+
# diar_segments = diarization_hook(str(file_path)) or []
|
| 302 |
+
# segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
|
| 303 |
+
# start_clock = time.time()
|
| 304 |
+
# for seg in segments:
|
| 305 |
+
# wait_for = seg.start - (time.time() - start_clock)
|
| 306 |
+
# if wait_for > 0:
|
| 307 |
+
# time.sleep(wait_for)
|
| 308 |
+
# speaker = "Unknown"
|
| 309 |
+
# for d_start, d_end, d_label in diar_segments:
|
| 310 |
+
# if (seg.start < d_end) and (seg.end > d_start):
|
| 311 |
+
# speaker = d_label
|
| 312 |
+
# break
|
| 313 |
+
|
| 314 |
+
# seg_obj = {
|
| 315 |
+
# "start": float(seg.start),
|
| 316 |
+
# "end": float(seg.end),
|
| 317 |
+
# "speaker": speaker,
|
| 318 |
+
# "text": seg.text.strip()
|
| 319 |
+
# }
|
| 320 |
+
# recording_status["live_segments"].append(seg_obj)
|
| 321 |
+
|
| 322 |
+
# # --- NEW: also append to transcript file so /events SSE can stream it ---
|
| 323 |
+
# line = f"{seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
|
| 324 |
+
# with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 325 |
+
# tf.write(line)
|
| 326 |
+
|
| 327 |
+
# recording_status["recording"] = False
|
| 328 |
+
# except Exception as e:
|
| 329 |
+
# print("Error in file transcription:", e)
|
| 330 |
+
# recording_status["recording"] = False
|
| 331 |
+
|
| 332 |
+
# threading.Thread(target=worker, daemon=True).start()
|
| 333 |
+
# return jsonify({"ok": True})
|
| 334 |
+
|
| 335 |
+
def find_system_loopback_index():
|
| 336 |
+
"""
|
| 337 |
+
Try to find a likely loopback / system audio input device.
|
| 338 |
+
Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
|
| 339 |
+
'virtual', 'audio cable'. Otherwise fallback to default input device.
|
| 340 |
+
"""
|
| 341 |
+
pa = None
|
| 342 |
+
try:
|
| 343 |
+
import pyaudio
|
| 344 |
+
pa = pyaudio.PyAudio()
|
| 345 |
+
except Exception:
|
| 346 |
+
return None
|
| 347 |
+
|
| 348 |
+
keywords = ["loop", "stereo", "mix", "what u hear", "virtual", "audio cable", "loopback", "monitor"]
|
| 349 |
+
best_idx = None
|
| 350 |
+
for i in range(pa.get_device_count()):
|
| 351 |
+
try:
|
| 352 |
+
dev = pa.get_device_info_by_index(i)
|
| 353 |
+
name = (dev.get("name") or "").lower()
|
| 354 |
+
max_in = dev.get("maxInputChannels", 0)
|
| 355 |
+
if max_in <= 0:
|
| 356 |
+
continue
|
| 357 |
+
for kw in keywords:
|
| 358 |
+
if kw in name:
|
| 359 |
+
best_idx = int(dev["index"])
|
| 360 |
+
pa.terminate()
|
| 361 |
+
return best_idx
|
| 362 |
+
except Exception:
|
| 363 |
+
continue
|
| 364 |
+
|
| 365 |
+
try:
|
| 366 |
+
default_info = pa.get_default_input_device_info()
|
| 367 |
+
idx = int(default_info.get("index"))
|
| 368 |
+
pa.terminate()
|
| 369 |
+
return idx
|
| 370 |
+
except Exception:
|
| 371 |
+
if pa:
|
| 372 |
+
pa.terminate()
|
| 373 |
+
return None
|
| 374 |
+
|
| 375 |
+
@app.route("/api/start-transcribe-file", methods=["POST"])
|
| 376 |
+
def api_start_transcribe_file():
|
| 377 |
+
"""
|
| 378 |
+
Start a background thread which calls rec_transcribe_extension.run_recording(...)
|
| 379 |
+
We try to detect a loopback device; if not found we pick the default input device.
|
| 380 |
+
"""
|
| 381 |
+
global recording_thread
|
| 382 |
+
body = request.get_json(force=True, silent=True) or {}
|
| 383 |
+
filename = body.get('filename')
|
| 384 |
+
|
| 385 |
+
# Basic check: uploaded file exists (we don't actually play the file on the server,
|
| 386 |
+
# but it's a sanity check so user didn't start without uploading)
|
| 387 |
+
if filename:
|
| 388 |
+
if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
|
| 389 |
+
return jsonify(success=False, error="Uploaded file not found on server"), 400
|
| 390 |
+
|
| 391 |
+
with recording_lock:
|
| 392 |
+
# if there's an active recording, return ok
|
| 393 |
+
if recording_thread and recording_thread.is_alive():
|
| 394 |
+
return jsonify(success=True, message="Recording already running")
|
| 395 |
+
# clear any previous stop_event
|
| 396 |
+
try:
|
| 397 |
+
if hasattr(rte, 'stop_event'):
|
| 398 |
+
rte.stop_event = threading.Event() # new event the run_recording will wait on
|
| 399 |
+
except Exception:
|
| 400 |
+
pass
|
| 401 |
+
|
| 402 |
+
# choose device: prefer loopback
|
| 403 |
+
dev_index = find_system_loopback_index()
|
| 404 |
+
if dev_index is None:
|
| 405 |
+
return jsonify(success=False, error="No suitable audio input device found on server"), 500
|
| 406 |
+
|
| 407 |
+
# Start the recording in a background thread
|
| 408 |
+
def target():
|
| 409 |
+
try:
|
| 410 |
+
from rec_transcribe_extension import chunk_writer_and_transcribe_worker
|
| 411 |
+
import rec_transcribe_extension as rte
|
| 412 |
+
orig_worker = rte.chunk_writer_and_transcribe_worker
|
| 413 |
+
|
| 414 |
+
def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
|
| 415 |
+
while True:
|
| 416 |
+
try:
|
| 417 |
+
filename, frames = in_queue.get(timeout=1.0)
|
| 418 |
+
except queue.Empty:
|
| 419 |
+
if rte.stop_event.is_set() and in_queue.empty():
|
| 420 |
+
break
|
| 421 |
+
continue
|
| 422 |
+
|
| 423 |
+
rte.save_wav_from_frames(filename, frames, nchannels=rte.CHANNELS)
|
| 424 |
+
final_frames_list.extend(frames)
|
| 425 |
+
|
| 426 |
+
diar_segments = rte.diarization_hook(str(filename)) or []
|
| 427 |
+
|
| 428 |
+
if transcriber and transcriber.model:
|
| 429 |
+
try:
|
| 430 |
+
segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
|
| 431 |
+
for seg in segments:
|
| 432 |
+
seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
|
| 433 |
+
speaker = "Unknown"
|
| 434 |
+
for d_start, d_end, d_speaker in diar_segments:
|
| 435 |
+
if (seg_start < d_end) and (seg_end > d_start):
|
| 436 |
+
speaker = d_speaker
|
| 437 |
+
break
|
| 438 |
+
# Write diarized transcript line
|
| 439 |
+
line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
|
| 440 |
+
with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 441 |
+
tf.write(line)
|
| 442 |
+
except Exception as e:
|
| 443 |
+
print(f"Transcription error for {filename}: {e}")
|
| 444 |
+
|
| 445 |
+
print("Patched worker exiting.")
|
| 446 |
+
|
| 447 |
+
# Apply patch
|
| 448 |
+
rte.chunk_writer_and_transcribe_worker = patched_worker
|
| 449 |
+
try:
|
| 450 |
+
rte.run_recording(
|
| 451 |
+
mic_index=dev_index,
|
| 452 |
+
sys_index=None,
|
| 453 |
+
chunk_secs=getattr(rte, 'CHUNK_DURATION_SECS', 3),
|
| 454 |
+
model_name=getattr(rte, 'MODEL_NAME', None),
|
| 455 |
+
no_transcribe=False
|
| 456 |
+
)
|
| 457 |
+
finally:
|
| 458 |
+
rte.chunk_writer_and_transcribe_worker = orig_worker
|
| 459 |
+
|
| 460 |
+
except Exception as e:
|
| 461 |
+
print("run_recording exception:", e)
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
recording_thread = threading.Thread(target=target, daemon=True)
|
| 465 |
+
recording_thread.start()
|
| 466 |
+
return jsonify(success=True, message="Recording started", device_index=dev_index)
|
| 467 |
+
|
| 468 |
+
# @app.route("/static/<path:filename>")
|
| 469 |
+
# def static_files(filename):
|
| 470 |
+
# return send_from_directory(OUTPUT_DIR, filename)
|
| 471 |
+
|
| 472 |
+
@app.route("/stop", methods=["POST"])
|
| 473 |
+
def stop_recording():
|
| 474 |
+
"""
|
| 475 |
+
Signal the rec_transcribe_extension stop_event to stop gracefully.
|
| 476 |
+
"""
|
| 477 |
+
global recording_thread
|
| 478 |
+
with recording_lock:
|
| 479 |
+
# set the stop_event in module
|
| 480 |
+
if hasattr(rte, 'stop_event') and rte.stop_event is not None:
|
| 481 |
+
try:
|
| 482 |
+
rte.stop_event.set()
|
| 483 |
+
except Exception:
|
| 484 |
+
pass
|
| 485 |
+
return jsonify(success=True, message="Stop signal sent")
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
def tail_transcript_file(path, stop_cond_fn=None):
|
| 489 |
+
"""
|
| 490 |
+
Generator that tails the transcript file and yields SSE data lines.
|
| 491 |
+
If file doesn't exist yet, yield a short status message then keep waiting.
|
| 492 |
+
stop_cond_fn is a callable that when returns True will break.
|
| 493 |
+
"""
|
| 494 |
+
last_pos = 0
|
| 495 |
+
sent_initial = False
|
| 496 |
+
while True:
|
| 497 |
+
if stop_cond_fn and stop_cond_fn():
|
| 498 |
+
break
|
| 499 |
+
if os.path.exists(path):
|
| 500 |
+
with open(path, "r", encoding="utf-8", errors="ignore") as fh:
|
| 501 |
+
fh.seek(last_pos)
|
| 502 |
+
lines = fh.readlines()
|
| 503 |
+
if lines:
|
| 504 |
+
for ln in lines:
|
| 505 |
+
ln = ln.strip()
|
| 506 |
+
if ln:
|
| 507 |
+
yield f"data: {ln}\n\n"
|
| 508 |
+
last_pos = fh.tell()
|
| 509 |
+
sent_initial = True
|
| 510 |
+
else:
|
| 511 |
+
# no new lines
|
| 512 |
+
time.sleep(0.25)
|
| 513 |
+
else:
|
| 514 |
+
if not sent_initial:
|
| 515 |
+
yield "data: [info] Transcript file not yet created. Waiting...\n\n"
|
| 516 |
+
sent_initial = True
|
| 517 |
+
time.sleep(0.5)
|
| 518 |
+
# final notification
|
| 519 |
+
yield "data: [info] Transcription ended.\n\n"
|
| 520 |
+
# ---------------- SSE events (from app2) ----------------
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
@app.route("/events")
|
| 524 |
+
def events():
|
| 525 |
+
"""
|
| 526 |
+
SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
|
| 527 |
+
The stream ends when the module stop_event is set and the background recording thread finishes.
|
| 528 |
+
"""
|
| 529 |
+
transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
|
| 530 |
+
if not transcript_path:
|
| 531 |
+
return Response("No transcript file configured", status=500)
|
| 532 |
+
transcript_path = str(transcript_path)
|
| 533 |
+
|
| 534 |
+
def stop_fn():
|
| 535 |
+
# stop when the recording thread is no longer alive AND the module stop_event is set
|
| 536 |
+
cond = False
|
| 537 |
+
try:
|
| 538 |
+
cond = (hasattr(rte, 'stop_event')
|
| 539 |
+
and rte.stop_event is not None and rte.stop_event.is_set())
|
| 540 |
+
except Exception:
|
| 541 |
+
cond = False
|
| 542 |
+
# also stop if thread finished
|
| 543 |
+
t_alive = recording_thread.is_alive() if recording_thread is not None else False
|
| 544 |
+
# If stop requested and thread not alive -> end stream
|
| 545 |
+
return (cond and not t_alive)
|
| 546 |
+
|
| 547 |
+
return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
|
| 548 |
+
mimetype="text/event-stream")
|
| 549 |
+
|
| 550 |
+
@app.route("/status")
|
| 551 |
+
def status():
|
| 552 |
+
running = False
|
| 553 |
+
if recording_thread and recording_thread.is_alive():
|
| 554 |
+
running = True
|
| 555 |
+
return jsonify(running=running)
|
| 556 |
+
|
| 557 |
+
# ---------------- Run ----------------
|
| 558 |
+
if __name__ == "__main__":
|
| 559 |
+
app.run(host="0.0.0.0", port=7860, debug=True)
|
rec_transcribe_extension.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import wave
|
| 4 |
+
import queue
|
| 5 |
+
import threading
|
| 6 |
+
import datetime
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
import wave
|
| 9 |
+
import pyaudio
|
| 10 |
+
from pyannote.audio import Pipeline
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
|
| 14 |
+
DIARIZATION_AVAILABLE = True
|
| 15 |
+
except Exception:
|
| 16 |
+
diarization_pipeline = None
|
| 17 |
+
DIARIZATION_AVAILABLE = False
|
| 18 |
+
|
| 19 |
+
# Optional modules (import safely)
|
| 20 |
+
try:
|
| 21 |
+
from faster_whisper import WhisperModel
|
| 22 |
+
FASTER_WHISPER_AVAILABLE = True
|
| 23 |
+
except Exception:
|
| 24 |
+
FASTER_WHISPER_AVAILABLE = False
|
| 25 |
+
|
| 26 |
+
import numpy as np
|
| 27 |
+
|
| 28 |
+
# # Optional: voice activity detection
|
| 29 |
+
# try:
|
| 30 |
+
# import webrtcvad
|
| 31 |
+
# VAD_AVAILABLE = True
|
| 32 |
+
# except Exception:
|
| 33 |
+
# VAD_AVAILABLE = False
|
| 34 |
+
|
| 35 |
+
# ========== CONFIG ==========
|
| 36 |
+
RUN_TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 37 |
+
FORMAT = pyaudio.paInt16
|
| 38 |
+
CHANNELS = 1 # mono
|
| 39 |
+
# RATE = 16000
|
| 40 |
+
RATE = 44100
|
| 41 |
+
CHUNK = 1024 # frames per buffer read
|
| 42 |
+
CHUNK_DURATION_SECS = 5 # how long each saved chunk is (seconds)
|
| 43 |
+
OUTPUT_DIR = Path("output_transcript_diarization")
|
| 44 |
+
CHUNKS_DIR = OUTPUT_DIR / f"chunks_{RUN_TIMESTAMP}"
|
| 45 |
+
FINAL_WAV = OUTPUT_DIR / f"recorded_audio_{RUN_TIMESTAMP}.wav"
|
| 46 |
+
TRANSCRIPT_FILE = OUTPUT_DIR / f"transcript_{RUN_TIMESTAMP}.txt"
|
| 47 |
+
MODEL_NAME = "medium" # if using faster-whisper; change as desired
|
| 48 |
+
# ============================
|
| 49 |
+
|
| 50 |
+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 51 |
+
CHUNKS_DIR.mkdir(parents=True, exist_ok=True)
|
| 52 |
+
|
| 53 |
+
audio = pyaudio.PyAudio()
|
| 54 |
+
|
| 55 |
+
def list_input_devices():
|
| 56 |
+
'''
|
| 57 |
+
Lists all available audio input devices (microphones, loopbacks, etc.) with their
|
| 58 |
+
indices and channel counts.
|
| 59 |
+
'''
|
| 60 |
+
pa = pyaudio.PyAudio()
|
| 61 |
+
print("Available audio devices (inputs):")
|
| 62 |
+
for i in range(pa.get_device_count()):
|
| 63 |
+
dev = pa.get_device_info_by_index(i)
|
| 64 |
+
if dev.get("maxInputChannels", 0) > 0:
|
| 65 |
+
print(f" {i}: {dev['name']} - {dev['maxInputChannels']} chans")
|
| 66 |
+
pa.terminate()
|
| 67 |
+
|
| 68 |
+
def open_stream_for_device(device_index, channels=1):
|
| 69 |
+
'''
|
| 70 |
+
Opens a PyAudio input stream for the given device index and channel count.
|
| 71 |
+
'''
|
| 72 |
+
stream = audio.open(format=FORMAT,
|
| 73 |
+
channels=channels,
|
| 74 |
+
rate=RATE,
|
| 75 |
+
input=True,
|
| 76 |
+
frames_per_buffer=CHUNK,
|
| 77 |
+
input_device_index=device_index)
|
| 78 |
+
return stream
|
| 79 |
+
|
| 80 |
+
def save_wav_from_frames(path: Path, frames: list, nchannels=1):
|
| 81 |
+
'''
|
| 82 |
+
Saves a list of audio frames as a WAV file at the specified path.
|
| 83 |
+
'''
|
| 84 |
+
# Normalize of 44100 Rate
|
| 85 |
+
raw = b''.join(frames)
|
| 86 |
+
audio_array = np.frombuffer(raw, dtype=np.int16)
|
| 87 |
+
|
| 88 |
+
# Normalize: scale to 90% of int16 range
|
| 89 |
+
if np.max(np.abs(audio_array)) > 0:
|
| 90 |
+
audio_array = (audio_array / np.max(np.abs(audio_array)) * 32767 * 0.9).astype(np.int16)
|
| 91 |
+
|
| 92 |
+
with wave.open(str(path), 'wb') as wf:
|
| 93 |
+
wf.setnchannels(nchannels)
|
| 94 |
+
wf.setsampwidth(audio.get_sample_size(FORMAT))
|
| 95 |
+
wf.setframerate(RATE)
|
| 96 |
+
# wf.writeframes(b''.join(frames))
|
| 97 |
+
wf.writeframes(audio_array.tobytes())
|
| 98 |
+
|
| 99 |
+
def merge_mono_files_to_stereo(mic_path: Path, sys_path: Path, out_path: Path):
|
| 100 |
+
"""
|
| 101 |
+
Create simple stereo WAV: mic -> left channel, system -> right channel.
|
| 102 |
+
Requires numpy. Very naive — works when both inputs have same sample rate and length.
|
| 103 |
+
"""
|
| 104 |
+
with wave.open(str(mic_path), 'rb') as wm, wave.open(str(sys_path), 'rb') as ws:
|
| 105 |
+
assert wm.getframerate() == ws.getframerate() == RATE
|
| 106 |
+
sampwidth = wm.getsampwidth()
|
| 107 |
+
nframes = min(wm.getnframes(), ws.getnframes())
|
| 108 |
+
mic_bytes = wm.readframes(nframes)
|
| 109 |
+
sys_bytes = ws.readframes(nframes)
|
| 110 |
+
|
| 111 |
+
# convert bytes to int16
|
| 112 |
+
mic_arr = np.frombuffer(mic_bytes, dtype=np.int16)
|
| 113 |
+
sys_arr = np.frombuffer(sys_bytes, dtype=np.int16)
|
| 114 |
+
|
| 115 |
+
# interleave into stereo
|
| 116 |
+
stereo = np.empty((nframes * 2,), dtype=np.int16)
|
| 117 |
+
stereo[0::2] = mic_arr[:nframes]
|
| 118 |
+
stereo[1::2] = sys_arr[:nframes]
|
| 119 |
+
|
| 120 |
+
with wave.open(str(out_path), 'wb') as wf:
|
| 121 |
+
wf.setnchannels(2)
|
| 122 |
+
wf.setsampwidth(sampwidth)
|
| 123 |
+
wf.setframerate(RATE)
|
| 124 |
+
wf.writeframes(stereo.tobytes())
|
| 125 |
+
|
| 126 |
+
class Transcriber:
|
| 127 |
+
def __init__(self):
|
| 128 |
+
'''
|
| 129 |
+
Loads the faster-whisper model if available.
|
| 130 |
+
'''
|
| 131 |
+
self.model = None
|
| 132 |
+
if FASTER_WHISPER_AVAILABLE:
|
| 133 |
+
print("Loading faster-whisper model. This may take some time...")
|
| 134 |
+
# detect device via torch.cuda if available, otherwise CPU
|
| 135 |
+
try:
|
| 136 |
+
import torch
|
| 137 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 138 |
+
except Exception:
|
| 139 |
+
device = "cpu"
|
| 140 |
+
|
| 141 |
+
# choose a safe compute_type: float16 on GPU, float32 on CPU
|
| 142 |
+
compute_type = "float16" if device == "cuda" else "float32"
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
# instantiate model (may download weights on first run)
|
| 146 |
+
self.model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
|
| 147 |
+
print(f"Model loaded on {device} (compute_type={compute_type}).")
|
| 148 |
+
except Exception as e:
|
| 149 |
+
print("Failed to load faster-whisper model:", e)
|
| 150 |
+
print("Continuing without transcription.")
|
| 151 |
+
self.model = None
|
| 152 |
+
else:
|
| 153 |
+
print("faster-whisper not available. Transcription will be disabled.")
|
| 154 |
+
|
| 155 |
+
def transcribe_file(self, wav_path: str):
|
| 156 |
+
'''
|
| 157 |
+
Transcribes a WAV file and returns the text.
|
| 158 |
+
'''
|
| 159 |
+
if not self.model:
|
| 160 |
+
return None
|
| 161 |
+
try:
|
| 162 |
+
segments, info = self.model.transcribe(wav_path, beam_size=5)
|
| 163 |
+
text = " ".join([seg.text.strip() for seg in segments])
|
| 164 |
+
return text
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Transcription error for {wav_path}: {e}")
|
| 167 |
+
return None
|
| 168 |
+
|
| 169 |
+
def diarization_hook(audio_path: str):
|
| 170 |
+
"""
|
| 171 |
+
Run speaker diarization and return list of (start, end, speaker) tuples.
|
| 172 |
+
"""
|
| 173 |
+
if not DIARIZATION_AVAILABLE:
|
| 174 |
+
return None
|
| 175 |
+
diarization = diarization_pipeline(audio_path)
|
| 176 |
+
results = []
|
| 177 |
+
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
| 178 |
+
results.append((turn.start, turn.end, speaker))
|
| 179 |
+
return results
|
| 180 |
+
|
| 181 |
+
# Recorder threads
|
| 182 |
+
def record_loop(device_index, out_queue, label="mic"):
|
| 183 |
+
"""
|
| 184 |
+
Continuously read bytes from device stream and push full-second frames to queue.
|
| 185 |
+
"""
|
| 186 |
+
try:
|
| 187 |
+
stream = open_stream_for_device(device_index, channels=CHANNELS)
|
| 188 |
+
except Exception as e:
|
| 189 |
+
print(f"Could not open stream for device {device_index} ({label}): {e}")
|
| 190 |
+
return
|
| 191 |
+
frames_per_chunk = int(RATE / CHUNK * CHUNK_DURATION_SECS)
|
| 192 |
+
frames = []
|
| 193 |
+
print(f"Recording from device {device_index} ({label}) ... Press Ctrl+C to stop.")
|
| 194 |
+
error_count = 0
|
| 195 |
+
try:
|
| 196 |
+
while True:
|
| 197 |
+
try:
|
| 198 |
+
data = stream.read(CHUNK, exception_on_overflow=False)
|
| 199 |
+
error_count = 0 # reset on success
|
| 200 |
+
except Exception as e:
|
| 201 |
+
print(f"Read error on device {device_index} ({label}): {e}")
|
| 202 |
+
error_count += 1
|
| 203 |
+
if error_count > 10:
|
| 204 |
+
print(f"Too many errors on device {device_index} ({label}). Stopping this thread.")
|
| 205 |
+
break
|
| 206 |
+
continue
|
| 207 |
+
frames.append(data)
|
| 208 |
+
if len(frames) >= frames_per_chunk:
|
| 209 |
+
ts = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
|
| 210 |
+
filename = CHUNKS_DIR / f"{label}_{ts}.wav"
|
| 211 |
+
out_queue.put((filename, frames.copy()))
|
| 212 |
+
frames = []
|
| 213 |
+
except KeyboardInterrupt:
|
| 214 |
+
print(f"Recording thread {label} received KeyboardInterrupt.")
|
| 215 |
+
finally:
|
| 216 |
+
try:
|
| 217 |
+
stream.stop_stream()
|
| 218 |
+
stream.close()
|
| 219 |
+
except Exception:
|
| 220 |
+
pass
|
| 221 |
+
print(f"Recording thread for {label} exited.")
|
| 222 |
+
|
| 223 |
+
def chunk_writer_and_transcribe_worker(in_queue: queue.Queue, final_frames_list: list, transcriber: Transcriber, single_channel_label="mic"):
|
| 224 |
+
"""
|
| 225 |
+
Save chunk WAV files and optionally send for transcription.
|
| 226 |
+
Also store frames for final concatenated WAV.
|
| 227 |
+
"""
|
| 228 |
+
while True:
|
| 229 |
+
try:
|
| 230 |
+
filename, frames = in_queue.get(timeout=1.0)
|
| 231 |
+
except queue.Empty:
|
| 232 |
+
if stop_event.is_set() and in_queue.empty():
|
| 233 |
+
break
|
| 234 |
+
continue
|
| 235 |
+
|
| 236 |
+
save_wav_from_frames(filename, frames, nchannels=CHANNELS)
|
| 237 |
+
print(f"Saved chunk: {filename.name}")
|
| 238 |
+
final_frames_list.extend(frames)
|
| 239 |
+
|
| 240 |
+
diar = diarization_hook(str(filename))
|
| 241 |
+
diar_segments = []
|
| 242 |
+
if diar:
|
| 243 |
+
diar_segments = diar # list of (start, end, speaker)
|
| 244 |
+
|
| 245 |
+
# Transcribe chunk and get segments with timestamps
|
| 246 |
+
if transcriber and transcriber.model:
|
| 247 |
+
try:
|
| 248 |
+
# Get segments with timestamps
|
| 249 |
+
segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
|
| 250 |
+
# For each segment, find the speaker
|
| 251 |
+
for seg in segments:
|
| 252 |
+
seg_start = seg.start
|
| 253 |
+
seg_end = seg.end
|
| 254 |
+
seg_text = seg.text.strip()
|
| 255 |
+
# Find speaker whose segment overlaps with this transcription segment
|
| 256 |
+
speaker = "Unknown"
|
| 257 |
+
for d_start, d_end, d_speaker in diar_segments:
|
| 258 |
+
# If diarization segment overlaps with transcription segment
|
| 259 |
+
if (seg_start < d_end) and (seg_end > d_start):
|
| 260 |
+
speaker = d_speaker
|
| 261 |
+
break
|
| 262 |
+
line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
|
| 263 |
+
print(line.strip())
|
| 264 |
+
with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
|
| 265 |
+
tf.write(line)
|
| 266 |
+
except Exception as e:
|
| 267 |
+
print(f"Transcription error for {filename.name}: {e}")
|
| 268 |
+
print("Chunk writer/transcriber worker exiting.")
|
| 269 |
+
|
| 270 |
+
def run_recording(mic_index, sys_index=None, chunk_secs=CHUNK_DURATION_SECS, model_name=MODEL_NAME, no_transcribe=False):
|
| 271 |
+
global CHUNK_DURATION_SECS, MODEL_NAME, stop_event
|
| 272 |
+
CHUNK_DURATION_SECS = chunk_secs
|
| 273 |
+
MODEL_NAME = model_name
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# Start transcriber if enabled
|
| 278 |
+
transcriber = None if no_transcribe else Transcriber()
|
| 279 |
+
|
| 280 |
+
# Queues and threads
|
| 281 |
+
q = queue.Queue()
|
| 282 |
+
final_frames = []
|
| 283 |
+
if 'stop_event' not in globals() or stop_event is None:
|
| 284 |
+
stop_event = threading.Event()
|
| 285 |
+
|
| 286 |
+
mic_thread = threading.Thread(target=record_loop, args=(mic_index, q, "mic"), daemon=True)
|
| 287 |
+
mic_thread.start()
|
| 288 |
+
|
| 289 |
+
q_sys = None
|
| 290 |
+
writer_thread_sys = None
|
| 291 |
+
final_frames_sys = []
|
| 292 |
+
|
| 293 |
+
if sys_index is not None:
|
| 294 |
+
q_sys = queue.Queue()
|
| 295 |
+
sys_thread = threading.Thread(target=record_loop, args=(sys_index, q_sys, "sys"), daemon=True)
|
| 296 |
+
sys_thread.start()
|
| 297 |
+
writer_thread_sys = threading.Thread(target=chunk_writer_and_transcribe_worker, args=(q_sys, final_frames_sys, None, "sys"), daemon=True)
|
| 298 |
+
writer_thread_sys.start()
|
| 299 |
+
|
| 300 |
+
writer_thread = threading.Thread(target=chunk_writer_and_transcribe_worker, args=(q, final_frames, transcriber, "mic"), daemon=True)
|
| 301 |
+
writer_thread.start()
|
| 302 |
+
|
| 303 |
+
# try:
|
| 304 |
+
# while True:
|
| 305 |
+
# time.sleep(0.5)
|
| 306 |
+
# except KeyboardInterrupt:
|
| 307 |
+
# print("\nStopping all threads...")
|
| 308 |
+
# stop_event.set()
|
| 309 |
+
# time.sleep(1.0)
|
| 310 |
+
|
| 311 |
+
try:
|
| 312 |
+
# wait until the shared stop_event is set by the caller (Flask / api_stop-recording)
|
| 313 |
+
while not stop_event.is_set():
|
| 314 |
+
time.sleep(0.5)
|
| 315 |
+
except KeyboardInterrupt:
|
| 316 |
+
print("\nStopping all threads.")
|
| 317 |
+
stop_event.set()
|
| 318 |
+
time.sleep(1.0)
|
| 319 |
+
|
| 320 |
+
writer_thread.join(timeout=5)
|
| 321 |
+
if writer_thread_sys:
|
| 322 |
+
writer_thread_sys.join(timeout=5)
|
| 323 |
+
|
| 324 |
+
if final_frames:
|
| 325 |
+
save_wav_from_frames(FINAL_WAV, final_frames, nchannels=CHANNELS)
|
| 326 |
+
print(f"Saved final WAV: {FINAL_WAV}")
|
| 327 |
+
|
| 328 |
+
if final_frames and final_frames_sys:
|
| 329 |
+
final_sys_wav = OUTPUT_DIR / "recorded_system_full.wav"
|
| 330 |
+
save_wav_from_frames(final_sys_wav, final_frames_sys, nchannels=CHANNELS)
|
| 331 |
+
stereo_path = OUTPUT_DIR / "recorded_audio_stereo.wav"
|
| 332 |
+
merge_mono_files_to_stereo(FINAL_WAV, final_sys_wav, stereo_path)
|
| 333 |
+
print(f"Saved merged stereo WAV: {stereo_path}")
|
| 334 |
+
|
| 335 |
+
audio.terminate()
|
| 336 |
+
print("Done. Transcript (if any) saved to:", TRANSCRIPT_FILE)
|
| 337 |
+
# Main
|
| 338 |
+
if __name__ == "__main__":
|
| 339 |
+
list_input_devices()
|
| 340 |
+
mic_index = input("\nEnter the device index for your microphone (or press ENTER to use default): ").strip()
|
| 341 |
+
if mic_index == "":
|
| 342 |
+
mic_index = pyaudio.PyAudio().get_default_input_device_info()['index']
|
| 343 |
+
else:
|
| 344 |
+
mic_index = int(mic_index)
|
| 345 |
+
run_recording(mic_index)
|
requirements.txt
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohappyeyeballs
|
| 2 |
+
aiohttp
|
| 3 |
+
aioice
|
| 4 |
+
aiortc
|
| 5 |
+
aiosignal
|
| 6 |
+
alembic
|
| 7 |
+
antlr4-python3-runtime
|
| 8 |
+
asteroid-filterbanks
|
| 9 |
+
asttokens
|
| 10 |
+
attrs
|
| 11 |
+
audioread
|
| 12 |
+
av
|
| 13 |
+
bidict
|
| 14 |
+
blinker
|
| 15 |
+
certifi
|
| 16 |
+
cffi
|
| 17 |
+
charset-normalizer
|
| 18 |
+
click
|
| 19 |
+
colorama
|
| 20 |
+
coloredlogs
|
| 21 |
+
colorlog
|
| 22 |
+
comm
|
| 23 |
+
contourpy
|
| 24 |
+
cryptography
|
| 25 |
+
ctranslate2
|
| 26 |
+
cycler
|
| 27 |
+
debugpy
|
| 28 |
+
decorator
|
| 29 |
+
dnspython
|
| 30 |
+
docopt
|
| 31 |
+
einops
|
| 32 |
+
eventlet
|
| 33 |
+
executing
|
| 34 |
+
faster-whisper
|
| 35 |
+
filelock
|
| 36 |
+
Flask
|
| 37 |
+
Flask-SocketIO
|
| 38 |
+
flatbuffers
|
| 39 |
+
fonttools
|
| 40 |
+
frozenlist
|
| 41 |
+
fsspec
|
| 42 |
+
google-crc32c
|
| 43 |
+
greenlet
|
| 44 |
+
h11
|
| 45 |
+
huggingface-hub
|
| 46 |
+
humanfriendly
|
| 47 |
+
HyperPyYAML
|
| 48 |
+
idna
|
| 49 |
+
ifaddr
|
| 50 |
+
ipykernel
|
| 51 |
+
ipython
|
| 52 |
+
ipython_pygments_lexers
|
| 53 |
+
itsdangerous
|
| 54 |
+
jedi
|
| 55 |
+
Jinja2
|
| 56 |
+
joblib
|
| 57 |
+
julius
|
| 58 |
+
jupyter_client
|
| 59 |
+
jupyter_core
|
| 60 |
+
kiwisolver
|
| 61 |
+
lazy_loader
|
| 62 |
+
librosa
|
| 63 |
+
lightning
|
| 64 |
+
lightning-utilities
|
| 65 |
+
llvmlite
|
| 66 |
+
Mako
|
| 67 |
+
markdown-it-py
|
| 68 |
+
MarkupSafe
|
| 69 |
+
matplotlib
|
| 70 |
+
matplotlib-inline
|
| 71 |
+
mdurl
|
| 72 |
+
more-itertools
|
| 73 |
+
mpmath
|
| 74 |
+
msgpack
|
| 75 |
+
multidict
|
| 76 |
+
mypy_extensions
|
| 77 |
+
nest-asyncio
|
| 78 |
+
networkx
|
| 79 |
+
numba
|
| 80 |
+
numpy
|
| 81 |
+
omegaconf
|
| 82 |
+
onnxruntime
|
| 83 |
+
openai-whisper
|
| 84 |
+
optuna
|
| 85 |
+
packaging
|
| 86 |
+
pandas
|
| 87 |
+
parso
|
| 88 |
+
pillow
|
| 89 |
+
platformdirs
|
| 90 |
+
pooch
|
| 91 |
+
primePy
|
| 92 |
+
prompt_toolkit
|
| 93 |
+
propcache
|
| 94 |
+
protobuf
|
| 95 |
+
psutil
|
| 96 |
+
pure_eval
|
| 97 |
+
pyannotate
|
| 98 |
+
pyannote.audio
|
| 99 |
+
pyannote.core
|
| 100 |
+
pyannote.database
|
| 101 |
+
pyannote.metrics
|
| 102 |
+
pyannote.pipeline
|
| 103 |
+
PyAudio
|
| 104 |
+
pycparser
|
| 105 |
+
pydub
|
| 106 |
+
pyee
|
| 107 |
+
Pygments
|
| 108 |
+
pylibsrtp
|
| 109 |
+
pyOpenSSL
|
| 110 |
+
pyparsing
|
| 111 |
+
pyreadline3
|
| 112 |
+
python-dateutil
|
| 113 |
+
python-engineio
|
| 114 |
+
python-socketio
|
| 115 |
+
pytorch-lightning
|
| 116 |
+
pytorch-metric-learning
|
| 117 |
+
pytz
|
| 118 |
+
pywin32
|
| 119 |
+
PyYAML
|
| 120 |
+
pyzmq
|
| 121 |
+
regex
|
| 122 |
+
requests
|
| 123 |
+
resampy
|
| 124 |
+
Resemblyzer
|
| 125 |
+
rich
|
| 126 |
+
ruamel.yaml
|
| 127 |
+
ruamel.yaml.clib
|
| 128 |
+
safetensors
|
| 129 |
+
scikit-learn
|
| 130 |
+
scipy
|
| 131 |
+
semver
|
| 132 |
+
sentencepiece
|
| 133 |
+
setuptools
|
| 134 |
+
shellingham
|
| 135 |
+
simple-websocket
|
| 136 |
+
six
|
| 137 |
+
sortedcontainers
|
| 138 |
+
SoundCard
|
| 139 |
+
sounddevice
|
| 140 |
+
soundfile
|
| 141 |
+
soxr
|
| 142 |
+
speechbrain
|
| 143 |
+
SQLAlchemy
|
| 144 |
+
stack-data
|
| 145 |
+
sympy
|
| 146 |
+
tabulate
|
| 147 |
+
tensorboardX
|
| 148 |
+
threadpoolctl
|
| 149 |
+
tiktoken
|
| 150 |
+
tokenizers
|
| 151 |
+
torch
|
| 152 |
+
torch-audiomentations
|
| 153 |
+
torch_pitch_shift
|
| 154 |
+
torchaudio
|
| 155 |
+
torchmetrics
|
| 156 |
+
tornado
|
| 157 |
+
tqdm
|
| 158 |
+
traitlets
|
| 159 |
+
transformers
|
| 160 |
+
typer
|
| 161 |
+
typing
|
| 162 |
+
typing_extensions
|
| 163 |
+
tzdata
|
| 164 |
+
urllib3
|
| 165 |
+
wavio
|
| 166 |
+
wcwidth
|
| 167 |
+
webrtcvad
|
| 168 |
+
websocket-client
|
| 169 |
+
Werkzeug
|
| 170 |
+
wsproto
|
| 171 |
+
yarl
|
static/icon_upload.png
ADDED
|
|
templates/index2.html
ADDED
|
@@ -0,0 +1,753 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8" />
|
| 6 |
+
<title>Audio Transcription Studio</title>
|
| 7 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 8 |
+
<!-- <link rel="icon" href="https://lovable.dev/favicon.ico"> -->
|
| 9 |
+
<!-- <link rel="icon" href="https://cdn-icons-png.flaticon.com/512/727/727245.png?v=2"> -->
|
| 10 |
+
<link rel="icon" href=".../icons8-speech recognition-external-smashingstocks-glyph-smashing-stocks-32.png?v=2">
|
| 11 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:400,600,700&display=swap">
|
| 12 |
+
<style>
|
| 13 |
+
:root {
|
| 14 |
+
--bg: #18122b;
|
| 15 |
+
--bg-card: #231942;
|
| 16 |
+
--bg-card2: #251e3e;
|
| 17 |
+
--accent: #a259ec;
|
| 18 |
+
--accent2: #2563eb;
|
| 19 |
+
--text: #fff;
|
| 20 |
+
--text-muted: #bcbcbc;
|
| 21 |
+
--border: #312e4a;
|
| 22 |
+
--success: #22c55e;
|
| 23 |
+
--danger: #dc2626;
|
| 24 |
+
--cyan: #00fff7;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
html,
|
| 28 |
+
body {
|
| 29 |
+
height: 100%;
|
| 30 |
+
margin: 0;
|
| 31 |
+
padding: 0;
|
| 32 |
+
font-family: 'Inter', Arial, sans-serif;
|
| 33 |
+
background: var(--bg);
|
| 34 |
+
color: var(--text);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.layout {
|
| 38 |
+
display: flex;
|
| 39 |
+
min-height: 100vh;
|
| 40 |
+
gap: 32px;
|
| 41 |
+
padding: 32px;
|
| 42 |
+
box-sizing: border-box;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.main-panel {
|
| 46 |
+
flex: 2;
|
| 47 |
+
display: flex;
|
| 48 |
+
flex-direction: column;
|
| 49 |
+
gap: 24px;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.card {
|
| 53 |
+
background: var(--bg-card);
|
| 54 |
+
border-radius: 18px;
|
| 55 |
+
box-shadow: 0 2px 16px #0003;
|
| 56 |
+
padding: 32px 32px 24px 32px;
|
| 57 |
+
margin-bottom: 0;
|
| 58 |
+
border: 1.5px solid var(--border);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
.card h2,
|
| 62 |
+
.card h3 {
|
| 63 |
+
margin-top: 0;
|
| 64 |
+
color: var(--accent);
|
| 65 |
+
font-size: 1.5em;
|
| 66 |
+
font-weight: 700;
|
| 67 |
+
margin-bottom: 18px;
|
| 68 |
+
letter-spacing: 1px;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.sidebar {
|
| 72 |
+
flex: 1;
|
| 73 |
+
min-width: 320px;
|
| 74 |
+
background: var(--bg-card2);
|
| 75 |
+
border-radius: 18px;
|
| 76 |
+
box-shadow: 0 2px 16px #0003;
|
| 77 |
+
padding: 32px 28px 24px 28px;
|
| 78 |
+
display: flex;
|
| 79 |
+
flex-direction: column;
|
| 80 |
+
gap: 32px;
|
| 81 |
+
border: 1.5px solid var(--border);
|
| 82 |
+
height: fit-content;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.sidebar h3 {
|
| 86 |
+
color: var(--accent2);
|
| 87 |
+
font-size: 1.2em;
|
| 88 |
+
font-weight: 700;
|
| 89 |
+
margin-bottom: 18px;
|
| 90 |
+
letter-spacing: 1px;
|
| 91 |
+
display: flex;
|
| 92 |
+
align-items: center;
|
| 93 |
+
gap: 8px;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.sidebar label {
|
| 97 |
+
font-size: 1em;
|
| 98 |
+
color: var(--text-muted);
|
| 99 |
+
margin-top: 18px;
|
| 100 |
+
font-weight: 600;
|
| 101 |
+
display: flex;
|
| 102 |
+
align-items: center;
|
| 103 |
+
gap: 8px;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.sidebar select,
|
| 107 |
+
.sidebar input[type="number"] {
|
| 108 |
+
width: 100%;
|
| 109 |
+
margin-top: 6px;
|
| 110 |
+
padding: 10px;
|
| 111 |
+
border-radius: 8px;
|
| 112 |
+
border: 1px solid var(--border);
|
| 113 |
+
background: #201c3a;
|
| 114 |
+
color: var(--text);
|
| 115 |
+
font-size: 1em;
|
| 116 |
+
margin-bottom: 10px;
|
| 117 |
+
outline: none;
|
| 118 |
+
transition: border 0.2s;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.sidebar select:focus,
|
| 122 |
+
.sidebar input[type="number"]:focus {
|
| 123 |
+
border: 1.5px solid var(--accent2);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
.sidebar button {
|
| 127 |
+
width: 100%;
|
| 128 |
+
padding: 14px 0;
|
| 129 |
+
margin-top: 18px;
|
| 130 |
+
border: none;
|
| 131 |
+
border-radius: 8px;
|
| 132 |
+
background: var(--accent);
|
| 133 |
+
color: #fff;
|
| 134 |
+
font-size: 1.1em;
|
| 135 |
+
font-weight: 600;
|
| 136 |
+
cursor: pointer;
|
| 137 |
+
transition: background 0.2s;
|
| 138 |
+
box-shadow: 0 2px 8px #0002;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.sidebar button:disabled {
|
| 142 |
+
background: #a5b4fc;
|
| 143 |
+
cursor: not-allowed;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.sidebar .stop-btn {
|
| 147 |
+
background: var(--danger);
|
| 148 |
+
margin-top: 8px;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.toggle-row {
|
| 152 |
+
display: flex;
|
| 153 |
+
align-items: center;
|
| 154 |
+
gap: 10px;
|
| 155 |
+
margin-top: 10px;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
.toggle-label {
|
| 159 |
+
flex: 1;
|
| 160 |
+
color: var(--text-muted);
|
| 161 |
+
font-size: 1em;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.toggle-switch {
|
| 165 |
+
width: 38px;
|
| 166 |
+
height: 22px;
|
| 167 |
+
background: #333;
|
| 168 |
+
border-radius: 12px;
|
| 169 |
+
position: relative;
|
| 170 |
+
cursor: pointer;
|
| 171 |
+
transition: background 0.2s;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.toggle-switch input {
|
| 175 |
+
display: none;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.toggle-slider {
|
| 179 |
+
position: absolute;
|
| 180 |
+
top: 2px;
|
| 181 |
+
left: 2px;
|
| 182 |
+
width: 18px;
|
| 183 |
+
height: 18px;
|
| 184 |
+
background: var(--accent2);
|
| 185 |
+
border-radius: 50%;
|
| 186 |
+
transition: left 0.2s;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
.toggle-switch input:checked+.toggle-slider {
|
| 190 |
+
left: 18px;
|
| 191 |
+
background: var(--danger);
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.status {
|
| 195 |
+
margin: 18px 0 0 0;
|
| 196 |
+
font-weight: bold;
|
| 197 |
+
color: var(--success);
|
| 198 |
+
font-size: 1.1em;
|
| 199 |
+
text-align: center;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.recorder-center {
|
| 203 |
+
display: flex;
|
| 204 |
+
flex-direction: column;
|
| 205 |
+
align-items: center;
|
| 206 |
+
gap: 18px;
|
| 207 |
+
margin-bottom: 18px;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
.recorder-btn {
|
| 211 |
+
width: 90px;
|
| 212 |
+
height: 90px;
|
| 213 |
+
border-radius: 50%;
|
| 214 |
+
background: linear-gradient(135deg, #a259ec 60%, #2563eb 100%);
|
| 215 |
+
display: flex;
|
| 216 |
+
align-items: center;
|
| 217 |
+
justify-content: center;
|
| 218 |
+
box-shadow: 0 0 32px #a259ec55;
|
| 219 |
+
cursor: pointer;
|
| 220 |
+
transition: box-shadow 0.2s, background 0.2s;
|
| 221 |
+
position: relative;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
.recorder-btn.recording {
|
| 225 |
+
background: linear-gradient(135deg, #dc2626 60%, #a259ec 100%);
|
| 226 |
+
box-shadow: 0 0 32px #dc262655;
|
| 227 |
+
animation: pulse 1.2s infinite;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
@keyframes pulse {
|
| 231 |
+
0% {
|
| 232 |
+
box-shadow: 0 0 32px #dc262655;
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
50% {
|
| 236 |
+
box-shadow: 0 0 48px #dc2626aa;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
100% {
|
| 240 |
+
box-shadow: 0 0 32px #dc262655;
|
| 241 |
+
}
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
.recorder-btn svg {
|
| 245 |
+
width: 38px;
|
| 246 |
+
height: 38px;
|
| 247 |
+
color: #fff;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.recorder-status {
|
| 251 |
+
color: var(--success);
|
| 252 |
+
font-size: 1.1em;
|
| 253 |
+
font-weight: 600;
|
| 254 |
+
margin-top: 8px;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
.recorder-status.recording {
|
| 258 |
+
color: var(--danger);
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
.live {
|
| 262 |
+
margin-top: 0;
|
| 263 |
+
background: #201c3a;
|
| 264 |
+
border-radius: 12px;
|
| 265 |
+
padding: 18px 18px 10px 18px;
|
| 266 |
+
min-height: 90px;
|
| 267 |
+
border: 1px solid var(--border);
|
| 268 |
+
overflow: hidden;
|
| 269 |
+
/* hide outer overflow, inner #live will scroll */
|
| 270 |
+
display: flex;
|
| 271 |
+
flex-direction: column;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
/* inner container which actually scrolls */
|
| 275 |
+
#live {
|
| 276 |
+
flex: 1 1 auto;
|
| 277 |
+
overflow-y: auto;
|
| 278 |
+
padding-right: 6px;
|
| 279 |
+
/* give room for scroll bar */
|
| 280 |
+
-webkit-overflow-scrolling: touch;
|
| 281 |
+
scroll-behavior: smooth;
|
| 282 |
+
color: var(--text-muted);
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
.live h4 {
|
| 286 |
+
margin: 0 0 10px 0;
|
| 287 |
+
color: var(--cyan);
|
| 288 |
+
font-size: 1.08em;
|
| 289 |
+
font-weight: 600;
|
| 290 |
+
display: flex;
|
| 291 |
+
align-items: center;
|
| 292 |
+
gap: 8px;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
.chunk {
|
| 296 |
+
background: linear-gradient(90deg, rgba(45, 37, 74, 0.2), rgba(38, 32, 63, 0.12));
|
| 297 |
+
margin-bottom: 8px;
|
| 298 |
+
padding: 10px 12px;
|
| 299 |
+
border-radius: 8px;
|
| 300 |
+
font-size: 0.98em;
|
| 301 |
+
color: var(--text);
|
| 302 |
+
box-shadow: 0 1px 2px #0002;
|
| 303 |
+
border: 1px solid rgba(255, 255, 255, 0.02);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
/* Small speaker label */
|
| 307 |
+
.chunk b {
|
| 308 |
+
color: var(--cyan);
|
| 309 |
+
margin-right: 6px;
|
| 310 |
+
font-weight: 700;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
/* THEMED SCROLLBAR - WebKit (Chrome, Edge, Safari) */
|
| 314 |
+
#live::-webkit-scrollbar {
|
| 315 |
+
width: 10px;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
#live::-webkit-scrollbar-track {
|
| 319 |
+
background: rgba(255, 255, 255, 0.02);
|
| 320 |
+
border-radius: 10px;
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
#live::-webkit-scrollbar-thumb {
|
| 324 |
+
background: linear-gradient(180deg, var(--accent) 0%, var(--accent2) 100%);
|
| 325 |
+
border-radius: 10px;
|
| 326 |
+
border: 2px solid rgba(0, 0, 0, 0.15);
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
#live::-webkit-scrollbar-thumb:hover {
|
| 330 |
+
filter: brightness(0.95);
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
/* THEMED SCROLLBAR - Firefox */
|
| 334 |
+
#live {
|
| 335 |
+
scrollbar-width: thin;
|
| 336 |
+
scrollbar-color: var(--accent) rgba(255, 255, 255, 0.02);
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
/* responsive: reduce max-height on small screens */
|
| 340 |
+
@media (max-width: 700px) {
|
| 341 |
+
.live {
|
| 342 |
+
max-height: 200px;
|
| 343 |
+
}
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
.files h4 {
|
| 347 |
+
color: var(--accent2);
|
| 348 |
+
font-size: 1.08em;
|
| 349 |
+
margin: 0 0 10px 0;
|
| 350 |
+
font-weight: 600;
|
| 351 |
+
display: flex;
|
| 352 |
+
align-items: center;
|
| 353 |
+
gap: 8px;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
.file {
|
| 357 |
+
background: #2d254a;
|
| 358 |
+
margin-bottom: 8px;
|
| 359 |
+
padding: 8px 12px;
|
| 360 |
+
border-radius: 5px;
|
| 361 |
+
font-size: 1em;
|
| 362 |
+
color: #e0e7ef;
|
| 363 |
+
display: flex;
|
| 364 |
+
align-items: center;
|
| 365 |
+
justify-content: space-between;
|
| 366 |
+
box-shadow: 0 1px 2px #0001;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
.file a {
|
| 370 |
+
color: var(--accent2);
|
| 371 |
+
text-decoration: none;
|
| 372 |
+
font-weight: 500;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
.file a:hover {
|
| 376 |
+
text-decoration: underline;
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
#audio-player-container {
|
| 380 |
+
margin-bottom: 18px;
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
#waveform {
|
| 384 |
+
width: 100%;
|
| 385 |
+
height: 80px;
|
| 386 |
+
background: #2d254a;
|
| 387 |
+
border-radius: 6px;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
#transcript-container {
|
| 391 |
+
background: #2d254a;
|
| 392 |
+
padding: 14px;
|
| 393 |
+
border-radius: 6px;
|
| 394 |
+
margin-top: 24px;
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
#transcript-content {
|
| 398 |
+
margin-top: 10px;
|
| 399 |
+
white-space: pre-wrap;
|
| 400 |
+
font-size: 1em;
|
| 401 |
+
color: #e0e7ef;
|
| 402 |
+
max-height: 300px;
|
| 403 |
+
overflow: auto;
|
| 404 |
+
background: #201c3a;
|
| 405 |
+
padding: 10px;
|
| 406 |
+
border-radius: 4px;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
@media (max-width: 1100px) {
|
| 410 |
+
.layout {
|
| 411 |
+
flex-direction: column;
|
| 412 |
+
gap: 0;
|
| 413 |
+
padding: 12px;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
.sidebar {
|
| 417 |
+
min-width: unset;
|
| 418 |
+
width: 100%;
|
| 419 |
+
margin-bottom: 18px;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.main-panel {
|
| 423 |
+
padding: 0;
|
| 424 |
+
}
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
@media (max-width: 700px) {
|
| 428 |
+
|
| 429 |
+
.card,
|
| 430 |
+
.sidebar {
|
| 431 |
+
padding: 16px 8px 12px 8px;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
.main-panel {
|
| 435 |
+
gap: 12px;
|
| 436 |
+
}
|
| 437 |
+
}
|
| 438 |
+
</style>
|
| 439 |
+
</head>
|
| 440 |
+
|
| 441 |
+
<body>
|
| 442 |
+
<div class="layout">
|
| 443 |
+
<main class="main-panel">
|
| 444 |
+
<section class="card">
|
| 445 |
+
<h2 style="text-align:center;font-size:2.2em;color:#a259ec;margin-bottom:0;">Audio Transcription Studio</h2>
|
| 446 |
+
<div style="text-align:center;color:#bcbcbc;margin-bottom:24px;">
|
| 447 |
+
Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.
|
| 448 |
+
</div>
|
| 449 |
+
<div class="recorder-center">
|
| 450 |
+
<div id="recorderBtn" class="recorder-btn" title="Start/Stop Recording">
|
| 451 |
+
<svg id="micIcon" xmlns="http://www.w3.org/2000/svg" width="38" height="38" viewBox="0 0 24 24" fill="none"
|
| 452 |
+
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
|
| 453 |
+
class="lucide lucide-mic-icon lucide-mic">
|
| 454 |
+
<path d="M12 19v3" />
|
| 455 |
+
<path d="M19 10v2a7 7 0 0 1-14 0v-2" />
|
| 456 |
+
<rect x="9" y="2" width="6" height="13" rx="3" />
|
| 457 |
+
</svg>
|
| 458 |
+
<svg id="stopIcon" style="display:none;" xmlns="http://www.w3.org/2000/svg" fill="currentColor"
|
| 459 |
+
viewBox="0 0 24 24">
|
| 460 |
+
<rect x="6" y="6" width="12" height="12" rx="2" />
|
| 461 |
+
</svg>
|
| 462 |
+
</div>
|
| 463 |
+
<div id="recorderStatus" class="recorder-status">Ready to record</div>
|
| 464 |
+
</div>
|
| 465 |
+
</section>
|
| 466 |
+
|
| 467 |
+
<section class="card">
|
| 468 |
+
<h3><span style="color:var(--cyan);">💬</span> Live Transcription</h3>
|
| 469 |
+
<div class="live">
|
| 470 |
+
<div id="live" style="min-height:32px;color:#bcbcbc;">Start recording to see live transcription</div>
|
| 471 |
+
</div>
|
| 472 |
+
</section>
|
| 473 |
+
<!-- <section class="card files">
|
| 474 |
+
<h4><span style="color:var(--accent2);">📁</span> Recording Files</h4>
|
| 475 |
+
<div id="audio-player-container"></div>
|
| 476 |
+
<div id="transcript-container"></div>
|
| 477 |
+
<div id="files"></div>
|
| 478 |
+
</section> -->
|
| 479 |
+
</main>
|
| 480 |
+
<aside class="sidebar">
|
| 481 |
+
<h3><span style="color:var(--accent2);">⚙️</span> Recording Settings</h3>
|
| 482 |
+
<label for="mic">Microphone Device</label>
|
| 483 |
+
<select id="mic" disabled>
|
| 484 |
+
<option value="1" selected>Default Microphone (#1)</option>
|
| 485 |
+
</select>
|
| 486 |
+
<label for="sys">System Audio (Optional)</label>
|
| 487 |
+
<select id="sys" disabled>
|
| 488 |
+
<option value="16" selected>System Loopback (#16)</option>
|
| 489 |
+
</select>
|
| 490 |
+
<label for="chunk_secs">Chunk Length (seconds)</label>
|
| 491 |
+
<input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
|
| 492 |
+
<label for="model">Transcription Model</label>
|
| 493 |
+
<select id="model" disabled>
|
| 494 |
+
<option value="small">Small (Fast)</option>
|
| 495 |
+
<option value="medium" selected>Medium (Balanced)</option>
|
| 496 |
+
<option value="large">Large (Accurate)</option>
|
| 497 |
+
</select>
|
| 498 |
+
<div class="toggle-row">
|
| 499 |
+
<span class="toggle-label">Disable Transcription</span>
|
| 500 |
+
<label class="toggle-switch">
|
| 501 |
+
<input type="checkbox" id="no_transcribe">
|
| 502 |
+
<span class="toggle-slider"></span>
|
| 503 |
+
</label>
|
| 504 |
+
</div>
|
| 505 |
+
<div class="status" id="status"></div>
|
| 506 |
+
</aside>
|
| 507 |
+
</div>
|
| 508 |
+
<script>
|
| 509 |
+
// --- Recording Button Logic ---
|
| 510 |
+
let isRecording = false;
|
| 511 |
+
let polling = null;
|
| 512 |
+
const recorderBtn = document.getElementById('recorderBtn');
|
| 513 |
+
const micIcon = document.getElementById('micIcon');
|
| 514 |
+
const stopIcon = document.getElementById('stopIcon');
|
| 515 |
+
const recorderStatus = document.getElementById('recorderStatus');
|
| 516 |
+
const startBtn = recorderBtn; // Use the big round button
|
| 517 |
+
|
| 518 |
+
function setRecordingUI(recording) {
|
| 519 |
+
isRecording = recording;
|
| 520 |
+
if (recording) {
|
| 521 |
+
recorderBtn.classList.add('recording');
|
| 522 |
+
micIcon.style.display = 'none';
|
| 523 |
+
stopIcon.style.display = '';
|
| 524 |
+
recorderStatus.textContent = 'Recording...';
|
| 525 |
+
recorderStatus.classList.add('recording');
|
| 526 |
+
} else {
|
| 527 |
+
recorderBtn.classList.remove('recording');
|
| 528 |
+
micIcon.style.display = '';
|
| 529 |
+
stopIcon.style.display = 'none';
|
| 530 |
+
recorderStatus.textContent = 'Ready to record';
|
| 531 |
+
recorderStatus.classList.remove('recording');
|
| 532 |
+
}
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
recorderBtn.onclick = async function () {
|
| 536 |
+
if (!isRecording) {
|
| 537 |
+
await startRecording();
|
| 538 |
+
} else {
|
| 539 |
+
await stopRecording();
|
| 540 |
+
}
|
| 541 |
+
};
|
| 542 |
+
|
| 543 |
+
async function startRecording() {
|
| 544 |
+
const mic = 1;
|
| 545 |
+
const sys = 16;
|
| 546 |
+
const chunk_secs = 5;
|
| 547 |
+
const model = "medium";
|
| 548 |
+
const no_transcribe = document.getElementById('no_transcribe').checked;
|
| 549 |
+
const statusEl = document.getElementById('status');
|
| 550 |
+
|
| 551 |
+
// Show immediate feedback
|
| 552 |
+
statusEl.textContent = 'Starting...';
|
| 553 |
+
// reset color to normal (css var)
|
| 554 |
+
statusEl.style.color = 'var(--accent2)';
|
| 555 |
+
|
| 556 |
+
try {
|
| 557 |
+
const resp = await fetch('/api/start-recording', {
|
| 558 |
+
method: 'POST',
|
| 559 |
+
headers: { 'Content-Type': 'application/json' },
|
| 560 |
+
body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
|
| 561 |
+
});
|
| 562 |
+
// const resp = await fetch('/transcribe_live/start', {
|
| 563 |
+
// method: 'POST',
|
| 564 |
+
// headers: { 'Content-Type': 'application/json' },
|
| 565 |
+
// body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
|
| 566 |
+
// });
|
| 567 |
+
|
| 568 |
+
if (!resp.ok) {
|
| 569 |
+
// Attempt to parse JSON { error: "..." } from backend
|
| 570 |
+
let errMsg = `Failed to start recording (${resp.status})`;
|
| 571 |
+
try {
|
| 572 |
+
const json = await resp.json();
|
| 573 |
+
if (json && json.error) errMsg = json.error;
|
| 574 |
+
} catch (e) {
|
| 575 |
+
// ignore parse error, keep fallback message
|
| 576 |
+
}
|
| 577 |
+
statusEl.textContent = errMsg;
|
| 578 |
+
statusEl.style.color = 'var(--danger)'; // show as error
|
| 579 |
+
setRecordingUI(false);
|
| 580 |
+
return; // don't start polling
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
// Success: start UI + polling
|
| 584 |
+
statusEl.textContent = 'Recording...';
|
| 585 |
+
statusEl.style.color = 'var(--danger)'; // recording color
|
| 586 |
+
setRecordingUI(true);
|
| 587 |
+
pollStatus();
|
| 588 |
+
|
| 589 |
+
} catch (err) {
|
| 590 |
+
// Network / unexpected error
|
| 591 |
+
statusEl.textContent = 'Network error: could not start recording';
|
| 592 |
+
statusEl.style.color = 'var(--danger)';
|
| 593 |
+
setRecordingUI(false);
|
| 594 |
+
console.error("startRecording error:", err);
|
| 595 |
+
}
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
async function stopRecording() {
|
| 599 |
+
await fetch('/api/stop-recording', { method: 'POST' });
|
| 600 |
+
document.getElementById('status').textContent = 'Stopping...';
|
| 601 |
+
setRecordingUI(false);
|
| 602 |
+
if (polling) clearInterval(polling);
|
| 603 |
+
setTimeout(() => { loadFiles(); }, 2000);
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
// --- Poll status ---
|
| 607 |
+
function pollStatus() {
|
| 608 |
+
polling = setInterval(async () => {
|
| 609 |
+
const res = await fetch('/api/recording-status');
|
| 610 |
+
const data = await res.json();
|
| 611 |
+
setRecordingUI(data.recording);
|
| 612 |
+
|
| 613 |
+
// --- Show live transcription ---
|
| 614 |
+
const liveDiv = document.getElementById('live');
|
| 615 |
+
liveDiv.innerHTML = '';
|
| 616 |
+
if (data.live_segments && data.live_segments.length) {
|
| 617 |
+
data.live_segments.slice(-10).forEach(seg => {
|
| 618 |
+
const div = document.createElement('div');
|
| 619 |
+
div.className = 'chunk';
|
| 620 |
+
div.innerHTML = `<b>${seg.speaker || 'Speaker'}:</b> [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.text}`;
|
| 621 |
+
liveDiv.appendChild(div);
|
| 622 |
+
});
|
| 623 |
+
requestAnimationFrame(() => {
|
| 624 |
+
liveDiv.scrollTop = liveDiv.scrollHeight;
|
| 625 |
+
});
|
| 626 |
+
} else {
|
| 627 |
+
liveDiv.textContent = 'No Transcription Yet...';
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
if (!data.recording) {
|
| 631 |
+
clearInterval(polling);
|
| 632 |
+
setRecordingUI(false);
|
| 633 |
+
loadFiles();
|
| 634 |
+
}
|
| 635 |
+
}, 1000);
|
| 636 |
+
}
|
| 637 |
+
// Helper to format time
|
| 638 |
+
function formatTime(s) {
|
| 639 |
+
if (s == null) return "0:00";
|
| 640 |
+
const mm = Math.floor(s / 60);
|
| 641 |
+
const ss = Math.floor(s % 60).toString().padStart(2, "0");
|
| 642 |
+
return `${mm}:${ss}`;
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
// --- Load final files and display audio player and transcript ---
|
| 646 |
+
async function loadFiles() {
|
| 647 |
+
const filesDiv = document.getElementById('files');
|
| 648 |
+
const audioPlayerDiv = document.getElementById('audio-player-container');
|
| 649 |
+
const transcriptDiv = document.getElementById('transcript-container');
|
| 650 |
+
filesDiv.innerHTML = '';
|
| 651 |
+
audioPlayerDiv.innerHTML = '';
|
| 652 |
+
transcriptDiv.innerHTML = '';
|
| 653 |
+
|
| 654 |
+
try {
|
| 655 |
+
const res = await fetch('/api/final-files');
|
| 656 |
+
const data = await res.json();
|
| 657 |
+
if (!data.files.length) {
|
| 658 |
+
filesDiv.textContent = 'No files yet.';
|
| 659 |
+
return;
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
// Find the latest recorded_audio_{RUN_TIMESTAMP}.wav and transcript_{RUN_TIMESTAMP}.txt
|
| 663 |
+
let audioFile = null, transcriptFile = null;
|
| 664 |
+
data.files.forEach(f => {
|
| 665 |
+
if (/^recorded_audio_.*\.wav$/.test(f.name)) audioFile = f;
|
| 666 |
+
if (/^transcript_.*\.txt$/.test(f.name)) transcriptFile = f;
|
| 667 |
+
});
|
| 668 |
+
|
| 669 |
+
// Display audio player with waveform (using wavesurfer.js if available, else fallback)
|
| 670 |
+
if (audioFile) {
|
| 671 |
+
audioPlayerDiv.innerHTML = `
|
| 672 |
+
<div style="margin-bottom:12px;">
|
| 673 |
+
<b>${audioFile.name}</b>
|
| 674 |
+
</div>
|
| 675 |
+
<div id="waveform" style="width:100%;height:80px;background:#2d254a;border-radius:6px;"></div>
|
| 676 |
+
<audio id="audio-player" controls style="width:100%;margin-top:8px;">
|
| 677 |
+
<source src="${audioFile.url || audioFile.path}" type="audio/wav">
|
| 678 |
+
Your browser does not support the audio element.
|
| 679 |
+
</audio>
|
| 680 |
+
`;
|
| 681 |
+
// Try to use wavesurfer.js for waveform
|
| 682 |
+
if (window.WaveSurfer) {
|
| 683 |
+
const wavesurfer = WaveSurfer.create({
|
| 684 |
+
container: '#waveform',
|
| 685 |
+
waveColor: '#a259ec',
|
| 686 |
+
progressColor: '#2563eb',
|
| 687 |
+
height: 80,
|
| 688 |
+
barWidth: 2,
|
| 689 |
+
responsive: true,
|
| 690 |
+
cursorColor: '#dc2626'
|
| 691 |
+
});
|
| 692 |
+
wavesurfer.load(audioFile.url || audioFile.path);
|
| 693 |
+
// Sync play/pause with audio element
|
| 694 |
+
const audioElem = document.getElementById('audio-player');
|
| 695 |
+
audioElem.addEventListener('play', () => wavesurfer.play());
|
| 696 |
+
audioElem.addEventListener('pause', () => wavesurfer.pause());
|
| 697 |
+
wavesurfer.on('seek', (progress) => {
|
| 698 |
+
audioElem.currentTime = progress * audioElem.duration;
|
| 699 |
+
});
|
| 700 |
+
audioElem.addEventListener('timeupdate', () => {
|
| 701 |
+
if (!audioElem.paused) {
|
| 702 |
+
wavesurfer.seekTo(audioElem.currentTime / audioElem.duration);
|
| 703 |
+
}
|
| 704 |
+
});
|
| 705 |
+
} else {
|
| 706 |
+
document.getElementById('waveform').innerHTML = '<div style="color:#64748b;text-align:center;padding-top:28px;">(Waveform preview requires wavesurfer.js)</div>';
|
| 707 |
+
}
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
// Display transcript file content
|
| 711 |
+
if (transcriptFile) {
|
| 712 |
+
transcriptDiv.innerHTML = `
|
| 713 |
+
<b>${transcriptFile.name}</b>
|
| 714 |
+
<pre id="transcript-content"></pre>
|
| 715 |
+
`;
|
| 716 |
+
// Fetch and display transcript text
|
| 717 |
+
fetch(transcriptFile.url || transcriptFile.path)
|
| 718 |
+
.then(r => r.text())
|
| 719 |
+
.then(txt => {
|
| 720 |
+
document.getElementById('transcript-content').textContent = txt;
|
| 721 |
+
});
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
// List other files (if any)
|
| 725 |
+
data.files.forEach(f => {
|
| 726 |
+
if (
|
| 727 |
+
(audioFile && f.name === audioFile.name) ||
|
| 728 |
+
(transcriptFile && f.name === transcriptFile.name)
|
| 729 |
+
) return;
|
| 730 |
+
const div = document.createElement('div');
|
| 731 |
+
div.className = 'file';
|
| 732 |
+
div.innerHTML = `<span>${f.name}</span> <a href="${f.url || f.path}" target="_blank">Download</a>`;
|
| 733 |
+
filesDiv.appendChild(div);
|
| 734 |
+
});
|
| 735 |
+
} catch (e) {
|
| 736 |
+
filesDiv.textContent = 'Error loading files.';
|
| 737 |
+
}
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
// --- On load ---
|
| 741 |
+
loadFiles();
|
| 742 |
+
|
| 743 |
+
// Optionally load wavesurfer.js dynamically if not present
|
| 744 |
+
if (!window.WaveSurfer) {
|
| 745 |
+
const script = document.createElement('script');
|
| 746 |
+
script.src = "https://unpkg.com/wavesurfer.js";
|
| 747 |
+
script.onload = () => { /* will auto-init on next loadFiles() call */ };
|
| 748 |
+
document.head.appendChild(script);
|
| 749 |
+
}
|
| 750 |
+
</script>
|
| 751 |
+
</body>
|
| 752 |
+
|
| 753 |
+
</html>
|
templates/index2_upload.html
ADDED
|
@@ -0,0 +1,736 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8" />
|
| 6 |
+
<title>Audio Transcription Studio</title>
|
| 7 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 8 |
+
<link rel="icon" href=".../icons8-speech recognition-external-smashingstocks-glyph-smashing-stocks-32.png?v=2">
|
| 9 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:400,600,700&display=swap">
|
| 10 |
+
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--bg: #18122b;
|
| 13 |
+
--bg-card: #231942;
|
| 14 |
+
--bg-card2: #251e3e;
|
| 15 |
+
--accent: #a259ec;
|
| 16 |
+
--accent2: #2563eb;
|
| 17 |
+
--text: #fff;
|
| 18 |
+
--text-muted: #bcbcbc;
|
| 19 |
+
--border: #312e4a;
|
| 20 |
+
--success: #22c55e;
|
| 21 |
+
--danger: #dc2626;
|
| 22 |
+
--cyan: #00fff7;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
html,
|
| 26 |
+
body {
|
| 27 |
+
height: 100%;
|
| 28 |
+
margin: 0;
|
| 29 |
+
padding: 0;
|
| 30 |
+
font-family: 'Inter', Arial, sans-serif;
|
| 31 |
+
background: var(--bg);
|
| 32 |
+
color: var(--text);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.layout {
|
| 36 |
+
display: flex;
|
| 37 |
+
min-height: 100vh;
|
| 38 |
+
gap: 32px;
|
| 39 |
+
padding: 32px;
|
| 40 |
+
box-sizing: border-box;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.main-panel {
|
| 44 |
+
flex: 2;
|
| 45 |
+
display: flex;
|
| 46 |
+
flex-direction: column;
|
| 47 |
+
gap: 24px;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
.card {
|
| 51 |
+
background: var(--bg-card);
|
| 52 |
+
border-radius: 18px;
|
| 53 |
+
box-shadow: 0 2px 16px #0003;
|
| 54 |
+
padding: 32px 32px 24px 32px;
|
| 55 |
+
margin-bottom: 0;
|
| 56 |
+
border: 1.5px solid var(--border);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.card h2,
|
| 60 |
+
.card h3 {
|
| 61 |
+
margin-top: 0;
|
| 62 |
+
color: var(--accent);
|
| 63 |
+
font-size: 1.5em;
|
| 64 |
+
font-weight: 700;
|
| 65 |
+
margin-bottom: 18px;
|
| 66 |
+
letter-spacing: 1px;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.sidebar {
|
| 70 |
+
flex: 1;
|
| 71 |
+
min-width: 320px;
|
| 72 |
+
background: var(--bg-card2);
|
| 73 |
+
border-radius: 18px;
|
| 74 |
+
box-shadow: 0 2px 16px #0003;
|
| 75 |
+
padding: 32px 28px 24px 28px;
|
| 76 |
+
display: flex;
|
| 77 |
+
flex-direction: column;
|
| 78 |
+
gap: 32px;
|
| 79 |
+
border: 1.5px solid var(--border);
|
| 80 |
+
height: fit-content;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.sidebar h3 {
|
| 84 |
+
color: var(--accent2);
|
| 85 |
+
font-size: 1.2em;
|
| 86 |
+
font-weight: 700;
|
| 87 |
+
margin-bottom: 18px;
|
| 88 |
+
letter-spacing: 1px;
|
| 89 |
+
display: flex;
|
| 90 |
+
align-items: center;
|
| 91 |
+
gap: 8px;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.sidebar label {
|
| 95 |
+
font-size: 1em;
|
| 96 |
+
color: var(--text-muted);
|
| 97 |
+
margin-top: 18px;
|
| 98 |
+
font-weight: 600;
|
| 99 |
+
display: flex;
|
| 100 |
+
align-items: center;
|
| 101 |
+
gap: 8px;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
.sidebar select,
|
| 105 |
+
.sidebar input[type="number"] {
|
| 106 |
+
width: 100%;
|
| 107 |
+
margin-top: 6px;
|
| 108 |
+
padding: 10px;
|
| 109 |
+
border-radius: 8px;
|
| 110 |
+
border: 1px solid var(--border);
|
| 111 |
+
background: #201c3a;
|
| 112 |
+
color: var(--text);
|
| 113 |
+
font-size: 1em;
|
| 114 |
+
margin-bottom: 10px;
|
| 115 |
+
outline: none;
|
| 116 |
+
transition: border 0.2s;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.sidebar select:focus,
|
| 120 |
+
.sidebar input[type="number"]:focus {
|
| 121 |
+
border: 1.5px solid var(--accent2);
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.sidebar button {
|
| 125 |
+
width: 100%;
|
| 126 |
+
padding: 14px 0;
|
| 127 |
+
margin-top: 18px;
|
| 128 |
+
border: none;
|
| 129 |
+
border-radius: 8px;
|
| 130 |
+
background: var(--accent);
|
| 131 |
+
color: #fff;
|
| 132 |
+
font-size: 1.1em;
|
| 133 |
+
font-weight: 600;
|
| 134 |
+
cursor: pointer;
|
| 135 |
+
transition: background 0.2s;
|
| 136 |
+
box-shadow: 0 2px 8px #0002;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
.sidebar button:disabled {
|
| 140 |
+
background: #a5b4fc;
|
| 141 |
+
cursor: not-allowed;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.sidebar .stop-btn {
|
| 145 |
+
background: var(--danger);
|
| 146 |
+
margin-top: 8px;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
.toggle-row {
|
| 150 |
+
display: flex;
|
| 151 |
+
align-items: center;
|
| 152 |
+
gap: 10px;
|
| 153 |
+
margin-top: 10px;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.toggle-label {
|
| 157 |
+
flex: 1;
|
| 158 |
+
color: var(--text-muted);
|
| 159 |
+
font-size: 1em;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
.toggle-switch {
|
| 163 |
+
width: 38px;
|
| 164 |
+
height: 22px;
|
| 165 |
+
background: #333;
|
| 166 |
+
border-radius: 12px;
|
| 167 |
+
position: relative;
|
| 168 |
+
cursor: pointer;
|
| 169 |
+
transition: background 0.2s;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.toggle-switch input {
|
| 173 |
+
display: none;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.toggle-slider {
|
| 177 |
+
position: absolute;
|
| 178 |
+
top: 2px;
|
| 179 |
+
left: 2px;
|
| 180 |
+
width: 18px;
|
| 181 |
+
height: 18px;
|
| 182 |
+
background: var(--accent2);
|
| 183 |
+
border-radius: 50%;
|
| 184 |
+
transition: left 0.2s;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
.toggle-switch input:checked+.toggle-slider {
|
| 188 |
+
left: 18px;
|
| 189 |
+
background: var(--danger);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
.status {
|
| 193 |
+
margin: 18px 0 0 0;
|
| 194 |
+
font-weight: bold;
|
| 195 |
+
color: var(--success);
|
| 196 |
+
font-size: 1.1em;
|
| 197 |
+
text-align: center;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
.recorder-center {
|
| 201 |
+
display: flex;
|
| 202 |
+
flex-direction: column;
|
| 203 |
+
align-items: center;
|
| 204 |
+
gap: 18px;
|
| 205 |
+
margin-bottom: 18px;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.recorder-btn {
|
| 209 |
+
width: 90px;
|
| 210 |
+
height: 90px;
|
| 211 |
+
border-radius: 50%;
|
| 212 |
+
background: linear-gradient(135deg, #a259ec 60%, #2563eb 100%);
|
| 213 |
+
display: flex;
|
| 214 |
+
align-items: center;
|
| 215 |
+
justify-content: center;
|
| 216 |
+
box-shadow: 0 0 32px #a259ec55;
|
| 217 |
+
cursor: pointer;
|
| 218 |
+
transition: box-shadow 0.2s, background 0.2s;
|
| 219 |
+
position: relative;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.recorder-btn.recording {
|
| 223 |
+
background: linear-gradient(135deg, #dc2626 60%, #a259ec 100%);
|
| 224 |
+
box-shadow: 0 0 32px #dc262655;
|
| 225 |
+
animation: pulse 1.2s infinite;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
@keyframes pulse {
|
| 229 |
+
0% {
|
| 230 |
+
box-shadow: 0 0 32px #dc262655;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
50% {
|
| 234 |
+
box-shadow: 0 0 48px #dc2626aa;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
100% {
|
| 238 |
+
box-shadow: 0 0 32px #dc262655;
|
| 239 |
+
}
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
.recorder-btn svg {
|
| 243 |
+
width: 38px;
|
| 244 |
+
height: 38px;
|
| 245 |
+
color: #fff;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
.recorder-status {
|
| 249 |
+
color: var(--success);
|
| 250 |
+
font-size: 1.1em;
|
| 251 |
+
font-weight: 600;
|
| 252 |
+
margin-top: 8px;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
.recorder-status.recording {
|
| 256 |
+
color: var(--danger);
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
.live {
|
| 260 |
+
margin-top: 0;
|
| 261 |
+
background: #201c3a;
|
| 262 |
+
border-radius: 12px;
|
| 263 |
+
padding: 18px 18px 10px 18px;
|
| 264 |
+
min-height: 90px;
|
| 265 |
+
border: 1px solid var(--border);
|
| 266 |
+
overflow: hidden;
|
| 267 |
+
display: flex;
|
| 268 |
+
flex-direction: column;
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
/* inner container which actually scrolls */
|
| 272 |
+
#live {
|
| 273 |
+
flex: 1 1 auto;
|
| 274 |
+
overflow-y: auto;
|
| 275 |
+
padding-right: 6px;
|
| 276 |
+
-webkit-overflow-scrolling: touch;
|
| 277 |
+
scroll-behavior: smooth;
|
| 278 |
+
color: var(--text-muted);
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
.live h4 {
|
| 282 |
+
margin: 0 0 10px 0;
|
| 283 |
+
color: var(--cyan);
|
| 284 |
+
font-size: 1.08em;
|
| 285 |
+
font-weight: 600;
|
| 286 |
+
display: flex;
|
| 287 |
+
align-items: center;
|
| 288 |
+
gap: 8px;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.chunk {
|
| 292 |
+
background: linear-gradient(90deg, rgba(45, 37, 74, 0.2), rgba(38, 32, 63, 0.12));
|
| 293 |
+
margin-bottom: 8px;
|
| 294 |
+
padding: 10px 12px;
|
| 295 |
+
border-radius: 8px;
|
| 296 |
+
font-size: 0.98em;
|
| 297 |
+
color: var(--text);
|
| 298 |
+
box-shadow: 0 1px 2px #0002;
|
| 299 |
+
border: 1px solid rgba(255, 255, 255, 0.02);
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
.chunk b {
|
| 303 |
+
color: var(--cyan);
|
| 304 |
+
margin-right: 6px;
|
| 305 |
+
font-weight: 700;
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
/* THEMED SCROLLBAR - WebKit (Chrome, Edge, Safari) */
|
| 309 |
+
#live::-webkit-scrollbar {
|
| 310 |
+
width: 10px;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
#live::-webkit-scrollbar-track {
|
| 314 |
+
background: rgba(255, 255, 255, 0.02);
|
| 315 |
+
border-radius: 10px;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
#live::-webkit-scrollbar-thumb {
|
| 319 |
+
background: linear-gradient(180deg, var(--accent) 0%, var(--accent2) 100%);
|
| 320 |
+
border-radius: 10px;
|
| 321 |
+
border: 2px solid rgba(0, 0, 0, 0.15);
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
#live::-webkit-scrollbar-thumb:hover {
|
| 325 |
+
filter: brightness(0.95);
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
#live {
|
| 329 |
+
scrollbar-width: thin;
|
| 330 |
+
scrollbar-color: var(--accent) rgba(255, 255, 255, 0.02);
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
@media (max-width: 700px) {
|
| 334 |
+
.live {
|
| 335 |
+
max-height: 200px;
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
.files h4 {
|
| 340 |
+
color: var(--accent2);
|
| 341 |
+
font-size: 1.08em;
|
| 342 |
+
margin: 0 0 10px 0;
|
| 343 |
+
font-weight: 600;
|
| 344 |
+
display: flex;
|
| 345 |
+
align-items: center;
|
| 346 |
+
gap: 8px;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.file {
|
| 350 |
+
background: #2d254a;
|
| 351 |
+
margin-bottom: 8px;
|
| 352 |
+
padding: 8px 12px;
|
| 353 |
+
border-radius: 5px;
|
| 354 |
+
font-size: 1em;
|
| 355 |
+
color: #e0e7ef;
|
| 356 |
+
display: flex;
|
| 357 |
+
align-items: center;
|
| 358 |
+
justify-content: space-between;
|
| 359 |
+
box-shadow: 0 1px 2px #0001;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.file a {
|
| 363 |
+
color: var(--accent2);
|
| 364 |
+
text-decoration: none;
|
| 365 |
+
font-weight: 500;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
.file a:hover {
|
| 369 |
+
text-decoration: underline;
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
#audio-player-container {
|
| 373 |
+
margin-bottom: 18px;
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
#waveform {
|
| 377 |
+
width: 100%;
|
| 378 |
+
height: 80px;
|
| 379 |
+
background: #2d254a;
|
| 380 |
+
border-radius: 6px;
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
#transcript-container {
|
| 384 |
+
background: #2d254a;
|
| 385 |
+
padding: 14px;
|
| 386 |
+
border-radius: 6px;
|
| 387 |
+
margin-top: 24px;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
#transcript-content {
|
| 391 |
+
margin-top: 10px;
|
| 392 |
+
white-space: pre-wrap;
|
| 393 |
+
font-size: 1em;
|
| 394 |
+
color: #e0e7ef;
|
| 395 |
+
max-height: 300px;
|
| 396 |
+
overflow: auto;
|
| 397 |
+
background: #201c3a;
|
| 398 |
+
padding: 10px;
|
| 399 |
+
border-radius: 4px;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
@media (max-width: 1100px) {
|
| 403 |
+
.layout {
|
| 404 |
+
flex-direction: column;
|
| 405 |
+
gap: 0;
|
| 406 |
+
padding: 12px;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
.sidebar {
|
| 410 |
+
min-width: unset;
|
| 411 |
+
width: 100%;
|
| 412 |
+
margin-bottom: 18px;
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
.main-panel {
|
| 416 |
+
padding: 0;
|
| 417 |
+
}
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
@media (max-width: 700px) {
|
| 421 |
+
|
| 422 |
+
.card,
|
| 423 |
+
.sidebar {
|
| 424 |
+
padding: 16px 8px 12px 8px;
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
.main-panel {
|
| 428 |
+
gap: 12px;
|
| 429 |
+
}
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
/* UPLOAD area styles */
|
| 433 |
+
.upload {
|
| 434 |
+
display: flex;
|
| 435 |
+
flex-direction: column;
|
| 436 |
+
align-items: center;
|
| 437 |
+
gap: 10px;
|
| 438 |
+
padding: 18px 22px;
|
| 439 |
+
border-radius: 12px;
|
| 440 |
+
background: rgba(255, 255, 255, 0.02);
|
| 441 |
+
border: 1px solid rgba(255, 255, 255, 0.03);
|
| 442 |
+
cursor: default;
|
| 443 |
+
width: 100%;
|
| 444 |
+
max-width: 420px;
|
| 445 |
+
margin: 0 auto;
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
.upload-btn {
|
| 449 |
+
display: flex;
|
| 450 |
+
flex-direction: column;
|
| 451 |
+
align-items: center;
|
| 452 |
+
gap: 10px;
|
| 453 |
+
cursor: pointer;
|
| 454 |
+
outline: none;
|
| 455 |
+
user-select: none;
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
.upload-btn:focus-visible {
|
| 459 |
+
box-shadow: 0 0 0 8px rgba(37, 99, 235, 0.08);
|
| 460 |
+
border-radius: 12px;
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
.upload-img {
|
| 464 |
+
width: 120px;
|
| 465 |
+
height: 96px;
|
| 466 |
+
object-fit: contain;
|
| 467 |
+
display: block;
|
| 468 |
+
user-select: none;
|
| 469 |
+
pointer-events: none;
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
.upload-text {
|
| 473 |
+
color: #bcbcbc;
|
| 474 |
+
font-weight: 700;
|
| 475 |
+
font-size: 14px;
|
| 476 |
+
text-align: center;
|
| 477 |
+
max-width: 220px;
|
| 478 |
+
word-break: break-word;
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
/* preview area inside upload container */
|
| 482 |
+
.upload-preview {
|
| 483 |
+
width: 100%;
|
| 484 |
+
display: flex;
|
| 485 |
+
flex-direction: column;
|
| 486 |
+
gap: 10px;
|
| 487 |
+
align-items: center;
|
| 488 |
+
margin-top: 6px;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
.upload-preview .filename {
|
| 492 |
+
color: var(--text);
|
| 493 |
+
font-weight: 600;
|
| 494 |
+
font-size: 0.95em;
|
| 495 |
+
text-align: center;
|
| 496 |
+
white-space: nowrap;
|
| 497 |
+
overflow: hidden;
|
| 498 |
+
text-overflow: ellipsis;
|
| 499 |
+
max-width: 100%;
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
.upload-preview .controls {
|
| 503 |
+
display: flex;
|
| 504 |
+
gap: 8px;
|
| 505 |
+
align-items: center;
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
.btn-small {
|
| 509 |
+
padding: 6px 10px;
|
| 510 |
+
border-radius: 6px;
|
| 511 |
+
background: #2d254a;
|
| 512 |
+
color: var(--text);
|
| 513 |
+
border: 1px solid rgba(255, 255, 255, 0.03);
|
| 514 |
+
cursor: pointer;
|
| 515 |
+
font-weight: 600;
|
| 516 |
+
}
|
| 517 |
+
</style>
|
| 518 |
+
</head>
|
| 519 |
+
|
| 520 |
+
<body>
|
| 521 |
+
<div class="layout">
|
| 522 |
+
<main class="main-panel">
|
| 523 |
+
<section class="card">
|
| 524 |
+
<h2 style="text-align:center;font-size:2.2em;color:#a259ec;margin-bottom:0;">Audio Transcription Studio
|
| 525 |
+
</h2>
|
| 526 |
+
<div style="text-align:center;color:#bcbcbc;margin-bottom:24px;">
|
| 527 |
+
Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.
|
| 528 |
+
</div>
|
| 529 |
+
<div class="upload">
|
| 530 |
+
|
| 531 |
+
<label for="uploadFile" class="upload-btn" tabindex="0" role="button"
|
| 532 |
+
aria-label="Upload audio file">
|
| 533 |
+
<img class="upload-img" src="/static/icon_upload.png" alt="Upload icon" />
|
| 534 |
+
<small class="upload-text">Upload .mp3, .wav file</small>
|
| 535 |
+
</label>
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
<input id="uploadFile" type="file" accept=".mp3,.wav,audio/*" style="display:none" />
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
<div id="uploadPreview" class="upload-preview" aria-live="polite"></div>
|
| 542 |
+
</div>
|
| 543 |
+
</section>
|
| 544 |
+
|
| 545 |
+
<section class="card">
|
| 546 |
+
<h3><span style="color:var(--cyan);">💬</span> Live Transcription</h3>
|
| 547 |
+
<div class="live">
|
| 548 |
+
<div id="live" style="min-height:32px;color:#bcbcbc;">Start recording to see live transcription
|
| 549 |
+
</div>
|
| 550 |
+
</div>
|
| 551 |
+
</section>
|
| 552 |
+
</main>
|
| 553 |
+
|
| 554 |
+
<aside class="sidebar">
|
| 555 |
+
<h3><span style="color:var(--accent2);">⚙️</span> Recording Settings</h3>
|
| 556 |
+
<label for="mic">Microphone Device</label>
|
| 557 |
+
<select id="mic" disabled>
|
| 558 |
+
<option value="1" selected>Default Microphone (#1)</option>
|
| 559 |
+
</select>
|
| 560 |
+
<label for="sys">System Audio (Optional)</label>
|
| 561 |
+
<select id="sys" disabled>
|
| 562 |
+
<option value="16" selected>System Loopback (#16)</option>
|
| 563 |
+
</select>
|
| 564 |
+
<label for="chunk_secs">Chunk Length (seconds)</label>
|
| 565 |
+
<input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
|
| 566 |
+
<label for="model">Transcription Model</label>
|
| 567 |
+
<select id="model" disabled>
|
| 568 |
+
<option value="small">Small (Fast)</option>
|
| 569 |
+
<option value="medium" selected>Medium (Balanced)</option>
|
| 570 |
+
<option value="large">Large (Accurate)</option>
|
| 571 |
+
</select>
|
| 572 |
+
<!-- <div class="toggle-row">
|
| 573 |
+
<span class="toggle-label">Disable Transcription</span>
|
| 574 |
+
<label class="toggle-switch">
|
| 575 |
+
<input type="checkbox" id="no_transcribe">
|
| 576 |
+
<span class="toggle-slider"></span>
|
| 577 |
+
</label>
|
| 578 |
+
</div> -->
|
| 579 |
+
<div class="status" id="status"></div>
|
| 580 |
+
</aside>
|
| 581 |
+
</div>
|
| 582 |
+
<script>
|
| 583 |
+
(function () {
|
| 584 |
+
const uploadEl = document.getElementById('uploadFile');
|
| 585 |
+
const preview = document.getElementById('uploadPreview');
|
| 586 |
+
const live = document.getElementById('live');
|
| 587 |
+
const statusEl = document.getElementById('status');
|
| 588 |
+
let audioEl = null;
|
| 589 |
+
let es = null; // EventSource
|
| 590 |
+
let playing = false;
|
| 591 |
+
let currentFile = null;
|
| 592 |
+
|
| 593 |
+
async function uploadFile(file) {
|
| 594 |
+
const fd = new FormData();
|
| 595 |
+
fd.append('file', file);
|
| 596 |
+
const resp = await fetch('/api/upload', { method: 'POST', body: fd });
|
| 597 |
+
return resp.json();
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
function createAudioPlayer(url, filename) {
|
| 601 |
+
preview.innerHTML = '';
|
| 602 |
+
const container = document.createElement('div');
|
| 603 |
+
container.style.width = '100%';
|
| 604 |
+
container.style.display = 'flex';
|
| 605 |
+
container.style.flexDirection = 'column';
|
| 606 |
+
container.style.alignItems = 'center';
|
| 607 |
+
|
| 608 |
+
const audio = document.createElement('audio');
|
| 609 |
+
audio.controls = true;
|
| 610 |
+
audio.src = url;
|
| 611 |
+
audio.id = 'uploadedAudio';
|
| 612 |
+
audio.style.width = '100%';
|
| 613 |
+
audio.dataset.filename = filename;
|
| 614 |
+
|
| 615 |
+
const info = document.createElement('div');
|
| 616 |
+
info.className = 'filename';
|
| 617 |
+
info.textContent = filename;
|
| 618 |
+
|
| 619 |
+
container.appendChild(info);
|
| 620 |
+
container.appendChild(audio);
|
| 621 |
+
preview.appendChild(container);
|
| 622 |
+
|
| 623 |
+
// listeners
|
| 624 |
+
audio.addEventListener('play', onPlay);
|
| 625 |
+
audio.addEventListener('pause', onPause);
|
| 626 |
+
audioEl = audio;
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
async function onPlay() {
|
| 630 |
+
if (!audioEl || playing) return;
|
| 631 |
+
playing = true;
|
| 632 |
+
currentFile = audioEl.dataset.filename;
|
| 633 |
+
|
| 634 |
+
// update UI
|
| 635 |
+
statusEl.textContent = "▶️ Transcribing...";
|
| 636 |
+
statusEl.style.color = "var(--success)";
|
| 637 |
+
|
| 638 |
+
try {
|
| 639 |
+
await fetch('/api/start-transcribe-file', {
|
| 640 |
+
method: 'POST',
|
| 641 |
+
headers: { 'Content-Type': 'application/json' },
|
| 642 |
+
body: JSON.stringify({ filename: currentFile })
|
| 643 |
+
});
|
| 644 |
+
} catch (e) {
|
| 645 |
+
console.error('start failed', e);
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
// open SSE for transcription
|
| 649 |
+
if (es) es.close();
|
| 650 |
+
es = new EventSource('/events');
|
| 651 |
+
es.onmessage = function (ev) {
|
| 652 |
+
const line = ev.data;
|
| 653 |
+
if (!line) return;
|
| 654 |
+
appendLine(line);
|
| 655 |
+
};
|
| 656 |
+
es.onerror = (e) => {
|
| 657 |
+
console.warn('SSE error', e);
|
| 658 |
+
};
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
async function onPause() {
|
| 662 |
+
if (!audioEl || !playing) return;
|
| 663 |
+
playing = false;
|
| 664 |
+
|
| 665 |
+
statusEl.textContent = "⏸️ Stopped";
|
| 666 |
+
statusEl.style.color = "var(--danger)";
|
| 667 |
+
|
| 668 |
+
try {
|
| 669 |
+
await fetch('/stop', { method: 'POST' });
|
| 670 |
+
} catch (e) {
|
| 671 |
+
console.error('stop failed', e);
|
| 672 |
+
}
|
| 673 |
+
if (es) {
|
| 674 |
+
es.close();
|
| 675 |
+
es = null;
|
| 676 |
+
}
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
// function appendLine(s) {
|
| 680 |
+
// const chunk = document.createElement('div');
|
| 681 |
+
// chunk.className = 'chunk';
|
| 682 |
+
// chunk.textContent = s;
|
| 683 |
+
// live.appendChild(chunk);
|
| 684 |
+
// live.scrollTop = live.scrollHeight;
|
| 685 |
+
// }
|
| 686 |
+
function appendLine(s) {
|
| 687 |
+
const chunk = document.createElement('div');
|
| 688 |
+
chunk.className = 'chunk';
|
| 689 |
+
|
| 690 |
+
// Try to parse format: [file.wav] 0.00-3.00 Speaker A: Hello world
|
| 691 |
+
const m = s.match(/^\[(.*?)\]\s+([\d.]+)-([\d.]+)\s+Speaker\s+(\S+):\s+(.*)$/);
|
| 692 |
+
if (m) {
|
| 693 |
+
const [, file, start, end, speaker, text] = m;
|
| 694 |
+
chunk.innerHTML = `<b>${speaker}</b> [${start}-${end}s]: ${text}`;
|
| 695 |
+
} else {
|
| 696 |
+
chunk.textContent = s;
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
live.appendChild(chunk);
|
| 700 |
+
live.scrollTop = live.scrollHeight;
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
// Poll /status every few seconds (optional, keeps sidebar updated)
|
| 704 |
+
async function pollStatus() {
|
| 705 |
+
try {
|
| 706 |
+
const r = await fetch('/status');
|
| 707 |
+
const js = await r.json();
|
| 708 |
+
if (js.running) {
|
| 709 |
+
statusEl.textContent = "▶️ Running";
|
| 710 |
+
statusEl.style.color = "var(--success)";
|
| 711 |
+
} else if (!playing) {
|
| 712 |
+
statusEl.textContent = "⏸️ Idle";
|
| 713 |
+
statusEl.style.color = "var(--text-muted)";
|
| 714 |
+
}
|
| 715 |
+
} catch (e) { }
|
| 716 |
+
setTimeout(pollStatus, 3000);
|
| 717 |
+
}
|
| 718 |
+
pollStatus();
|
| 719 |
+
|
| 720 |
+
uploadEl.addEventListener('change', async (ev) => {
|
| 721 |
+
const file = ev.target.files && ev.target.files[0];
|
| 722 |
+
if (!file) return;
|
| 723 |
+
|
| 724 |
+
const res = await uploadFile(file);
|
| 725 |
+
if (res && res.success) {
|
| 726 |
+
createAudioPlayer(res.url, res.filename);
|
| 727 |
+
live.innerHTML = '<div style="color:#bcbcbc;">Ready. Play audio to start live transcription.</div>';
|
| 728 |
+
} else {
|
| 729 |
+
alert('Upload failed: ' + (res && res.error ? res.error : 'unknown'));
|
| 730 |
+
}
|
| 731 |
+
});
|
| 732 |
+
})();
|
| 733 |
+
</script>
|
| 734 |
+
</body>
|
| 735 |
+
|
| 736 |
+
</html>
|
templates/landing.html
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
+
<title>LiveTranscribe — Real-time Audio Transcription</title>
|
| 7 |
+
<meta name="description" content="LiveTranscribe: real-time microphone + uploaded audio transcription with speaker diarization and downloadable transcripts." />
|
| 8 |
+
<!-- Tailwind CDN for quick styling -->
|
| 9 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 10 |
+
<style>
|
| 11 |
+
/* small extra tweaks */
|
| 12 |
+
.feature-icon { width:48px; height:48px; }
|
| 13 |
+
</style>
|
| 14 |
+
</head>
|
| 15 |
+
<body class="bg-gray-50 text-gray-800 font-sans">
|
| 16 |
+
<header class="bg-white shadow">
|
| 17 |
+
<div class="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
|
| 18 |
+
<div class="flex items-center gap-3">
|
| 19 |
+
<div class="w-10 h-10 rounded-lg bg-gradient-to-tr from-indigo-500 to-purple-500 flex items-center justify-center text-white font-bold">LT</div>
|
| 20 |
+
<div>
|
| 21 |
+
<a href="#home" class="text-xl font-semibold">LiveTranscribe</a>
|
| 22 |
+
<div class="text-sm text-gray-500">Real-time transcription + diarization</div>
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
<nav class="hidden md:flex items-center gap-6 text-sm">
|
| 26 |
+
<a href="#features" class="hover:underline">Features</a>
|
| 27 |
+
<a href="#how" class="hover:underline">How it works</a>
|
| 28 |
+
<a href="#deploy" class="hover:underline">Deploy</a>
|
| 29 |
+
<a href="#try" class="px-4 py-2 rounded-lg bg-indigo-600 text-white">Get started</a>
|
| 30 |
+
</nav>
|
| 31 |
+
<div class="md:hidden">
|
| 32 |
+
<button id="menuBtn" class="px-3 py-2 bg-gray-100 rounded">Menu</button>
|
| 33 |
+
</div>
|
| 34 |
+
</div>
|
| 35 |
+
</header>
|
| 36 |
+
|
| 37 |
+
<main id="home" class="max-w-6xl mx-auto px-6 py-12">
|
| 38 |
+
<section class="grid md:grid-cols-2 gap-10 items-center">
|
| 39 |
+
<div>
|
| 40 |
+
<h1 class="text-4xl md:text-5xl font-extrabold leading-tight">Real-time transcription for live and uploaded audio</h1>
|
| 41 |
+
<p class="mt-4 text-gray-600">Record, chunk, diarize speakers and transcribe — all in real time. Useful for meetings, podcasts, lectures, and interviews.</p>
|
| 42 |
+
|
| 43 |
+
<div class="mt-6 flex gap-3">
|
| 44 |
+
<a href="/live" class="px-5 py-3 rounded-lg bg-indigo-600 text-white shadow">Open Live Recorder</a>
|
| 45 |
+
<a href="/upload" class="px-5 py-3 rounded-lg border border-gray-200">Upload & Live Transcribe</a>
|
| 46 |
+
</div>
|
| 47 |
+
|
| 48 |
+
<div class="mt-6 text-sm text-gray-500">
|
| 49 |
+
<strong>Note:</strong> The links above assume your Flask static templates are served at the project root. Adjust if your routes differ.
|
| 50 |
+
</div>
|
| 51 |
+
</div>
|
| 52 |
+
|
| 53 |
+
<!-- <div class="bg-white rounded-lg shadow p-6">
|
| 54 |
+
<h3 class="font-semibold">Quick demo</h3>
|
| 55 |
+
<ol class="mt-3 list-decimal list-inside text-gray-600 text-sm space-y-2">
|
| 56 |
+
<li>Open <code>/index2.html</code> to start the live recorder and see live transcription + diarization.</li>
|
| 57 |
+
<li>Open <code>/index2_upload.html</code> to upload an audio file and receive a streaming transcript.</li>
|
| 58 |
+
<li>Download transcripts or copy them from the web UI.</li>
|
| 59 |
+
</ol>
|
| 60 |
+
</div> -->
|
| 61 |
+
</section>
|
| 62 |
+
|
| 63 |
+
<section id="features" class="mt-14">
|
| 64 |
+
<h2 class="text-2xl font-bold">Features</h2>
|
| 65 |
+
<div class="grid md:grid-cols-3 gap-6 mt-6">
|
| 66 |
+
<article class="bg-white p-6 rounded-lg shadow-sm">
|
| 67 |
+
<img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%235a67d8' viewBox='0 0 24 24'><path d='M12 3v10c0 3.31-2.69 6-6 6H4v2h8v-2h-2c2.21 0 4-1.79 4-4V3h-6z'/></svg>" alt="microphone" />
|
| 68 |
+
<h4 class="mt-3 font-semibold">Live recording</h4>
|
| 69 |
+
<p class="text-sm text-gray-600 mt-2">Capture mic & system audio in short chunks (5s) and stream them to the transcriber.</p>
|
| 70 |
+
</article>
|
| 71 |
+
|
| 72 |
+
<article class="bg-white p-6 rounded-lg shadow-sm">
|
| 73 |
+
<img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%2337a6ff' viewBox='0 0 24 24'><path d='M12 2C8 2 5 5 5 9c0 4.42 7 13 7 13s7-8.58 7-13c0-4-3-7-7-7z'/></svg>" alt="upload" />
|
| 74 |
+
<h4 class="mt-3 font-semibold">Upload + stream</h4>
|
| 75 |
+
<p class="text-sm text-gray-600 mt-2">Users upload audio files and get live, incremental transcripts back in the browser.</p>
|
| 76 |
+
</article>
|
| 77 |
+
|
| 78 |
+
<article class="bg-white p-6 rounded-lg shadow-sm">
|
| 79 |
+
<img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%23f59e0b' viewBox='0 0 24 24'><path d='M12 12c2.21 0 4-1.79 4-4V4h-8v4c0 2.21 1.79 4 4 4z'/></svg>" alt="diarization" />
|
| 80 |
+
<h4 class="mt-3 font-semibold">Speaker diarization</h4>
|
| 81 |
+
<p class="text-sm text-gray-600 mt-2">Differentiate speakers in meetings so transcripts label speakers/segments automatically.</p>
|
| 82 |
+
</article>
|
| 83 |
+
</div>
|
| 84 |
+
</section>
|
| 85 |
+
|
| 86 |
+
<section id="how" class="mt-14">
|
| 87 |
+
<h2 class="text-2xl font-bold">How it works</h2>
|
| 88 |
+
<div class="mt-4 grid md:grid-cols-3 gap-6">
|
| 89 |
+
<div class="col-span-2 bg-white p-6 rounded-lg shadow-sm">
|
| 90 |
+
<h3 class="font-semibold">Architecture (high level)</h3>
|
| 91 |
+
<ol class="list-decimal list-inside text-gray-600 mt-3 space-y-2 text-sm">
|
| 92 |
+
<li>Client captures audio (microphone or upload) and sends 5s chunks to the server via WebSocket / fetch.</li>
|
| 93 |
+
<li>Server stores, optionally concatenates chunks and runs an ASR model (eg. Whisper, faster-whisper) in streaming mode.</li>
|
| 94 |
+
<li>Speaker diarization runs (pyannote) to assign speaker labels to segments.</li>
|
| 95 |
+
<li>Transcribed segments + speaker labels are pushed to the client and shown live.</li>
|
| 96 |
+
<li>User can download the full transcript or copy segments.</li>
|
| 97 |
+
</ol>
|
| 98 |
+
</div>
|
| 99 |
+
|
| 100 |
+
<div class="bg-white p-6 rounded-lg shadow-sm">
|
| 101 |
+
<h3 class="font-semibold">Quick tips</h3>
|
| 102 |
+
<ul class="text-sm text-gray-600 mt-3 list-disc list-inside space-y-2">
|
| 103 |
+
<li>Tune chunk-length (5s is a good start) for latency vs accuracy tradeoff.</li>
|
| 104 |
+
<li>Run diarization asynchronously to avoid blocking transcription if you need lower latency.</li>
|
| 105 |
+
<li>Provide a "finalize" button so the server can merge chunks and run a final pass for improved accuracy.</li>
|
| 106 |
+
</ul>
|
| 107 |
+
</div>
|
| 108 |
+
</div>
|
| 109 |
+
</section>
|
| 110 |
+
|
| 111 |
+
<section id="deploy" class="mt-14">
|
| 112 |
+
<h2 class="text-2xl font-bold">Deploy & run</h2>
|
| 113 |
+
<div class="mt-4 bg-white p-6 rounded-lg shadow-sm text-sm text-gray-700">
|
| 114 |
+
<p>Typical steps to run locally or on a VM:</p>
|
| 115 |
+
<pre class="mt-3 bg-gray-100 p-3 rounded text-xs overflow-auto"># 1. create venv
|
| 116 |
+
python -m venv .venv
|
| 117 |
+
source .venv/bin/activate # or .\\venv\\Scripts\\activate on Windows
|
| 118 |
+
|
| 119 |
+
# 2. install requirements
|
| 120 |
+
pip install -r requirements.txt
|
| 121 |
+
|
| 122 |
+
# 3. run app(s)
|
| 123 |
+
python app.py # live recorder
|
| 124 |
+
python app2.py # upload-based transcription
|
| 125 |
+
|
| 126 |
+
# 4. open in browser
|
| 127 |
+
http://localhost:5000/index2.html
|
| 128 |
+
http://localhost:5000/index2_upload.html
|
| 129 |
+
</pre>
|
| 130 |
+
<p class="mt-3">If you want a single Flask app with a landing page route, add this snippet to your Flask app:</p>
|
| 131 |
+
<pre class="mt-3 bg-gray-100 p-3 rounded text-xs overflow-auto">@app.route('/')
|
| 132 |
+
def landing():
|
| 133 |
+
return render_template('landing.html')
|
| 134 |
+
</pre>
|
| 135 |
+
</div>
|
| 136 |
+
</section>
|
| 137 |
+
|
| 138 |
+
<section id="try" class="mt-14">
|
| 139 |
+
<h2 class="text-2xl font-bold">Try it now</h2>
|
| 140 |
+
<div class="mt-4 flex flex-col md:flex-row gap-4">
|
| 141 |
+
<a href="/live" class="px-4 py-3 rounded-lg bg-indigo-600 text-white">Open Live Recorder</a>
|
| 142 |
+
<a href="/upload" class="px-4 py-3 rounded-lg border">Upload & Transcribe</a>
|
| 143 |
+
<a href="#deploy" class="px-4 py-3 rounded-lg border">Deployment instructions</a>
|
| 144 |
+
</div>
|
| 145 |
+
</section>
|
| 146 |
+
|
| 147 |
+
<!-- <footer class="mt-16 text-center text-sm text-gray-500 pb-8">
|
| 148 |
+
Built with ❤️ — Add your logo, links, and analytics here.
|
| 149 |
+
</footer> -->
|
| 150 |
+
</main>
|
| 151 |
+
|
| 152 |
+
<script>
|
| 153 |
+
// tiny menu toggle for mobile
|
| 154 |
+
const menuBtn = document.getElementById('menuBtn');
|
| 155 |
+
menuBtn && menuBtn.addEventListener('click', ()=>{
|
| 156 |
+
alert('Use the links on the page: Features, How it works, Deploy, Try it now');
|
| 157 |
+
});
|
| 158 |
+
</script>
|
| 159 |
+
</body>
|
| 160 |
+
</html>
|
templates/test_index.html
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<title>Audio Recorder & Transcription UI</title>
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
font-family: 'Segoe UI', Arial, sans-serif;
|
| 10 |
+
background: linear-gradient(120deg, #f5f6fa 60%, #dbeafe 100%);
|
| 11 |
+
margin: 0;
|
| 12 |
+
padding: 0;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
.container {
|
| 16 |
+
max-width: 750px;
|
| 17 |
+
margin: 40px auto;
|
| 18 |
+
background: #fff;
|
| 19 |
+
border-radius: 14px;
|
| 20 |
+
box-shadow: 0 4px 24px #0002;
|
| 21 |
+
padding: 32px 32px 24px 32px;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
h1 {
|
| 25 |
+
margin-top: 0;
|
| 26 |
+
font-size: 2.2em;
|
| 27 |
+
letter-spacing: 1px;
|
| 28 |
+
color: #2563eb;
|
| 29 |
+
text-align: center;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
label {
|
| 33 |
+
display: block;
|
| 34 |
+
margin-top: 18px;
|
| 35 |
+
font-weight: 600;
|
| 36 |
+
color: #334155;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
select,
|
| 40 |
+
input[type="number"] {
|
| 41 |
+
margin-top: 6px;
|
| 42 |
+
padding: 8px;
|
| 43 |
+
font-size: 1em;
|
| 44 |
+
border-radius: 6px;
|
| 45 |
+
border: 1px solid #cbd5e1;
|
| 46 |
+
background: #f1f5f9;
|
| 47 |
+
width: 100%;
|
| 48 |
+
box-sizing: border-box;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
button {
|
| 52 |
+
margin-top: 12px;
|
| 53 |
+
margin-right: 10px;
|
| 54 |
+
padding: 10px 22px;
|
| 55 |
+
font-size: 1em;
|
| 56 |
+
font-weight: 600;
|
| 57 |
+
border: none;
|
| 58 |
+
border-radius: 6px;
|
| 59 |
+
background: #2563eb;
|
| 60 |
+
color: #fff;
|
| 61 |
+
cursor: pointer;
|
| 62 |
+
transition: background 0.2s;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
button:disabled {
|
| 66 |
+
background: #94a3b8;
|
| 67 |
+
cursor: not-allowed;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.stop-btn {
|
| 71 |
+
background: #dc2626;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.status {
|
| 75 |
+
margin-top: 18px;
|
| 76 |
+
font-weight: bold;
|
| 77 |
+
color: #0ea5e9;
|
| 78 |
+
text-align: center;
|
| 79 |
+
font-size: 1.1em;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.live {
|
| 83 |
+
margin-top: 32px;
|
| 84 |
+
background: #f1f5f9;
|
| 85 |
+
border-radius: 8px;
|
| 86 |
+
padding: 18px 18px 10px 18px;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.live h2 {
|
| 90 |
+
margin-top: 0;
|
| 91 |
+
color: #0ea5e9;
|
| 92 |
+
font-size: 1.2em;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.chunk {
|
| 96 |
+
background: #e0e7ef;
|
| 97 |
+
margin-bottom: 8px;
|
| 98 |
+
padding: 8px 12px;
|
| 99 |
+
border-radius: 5px;
|
| 100 |
+
font-size: 1em;
|
| 101 |
+
color: #334155;
|
| 102 |
+
box-shadow: 0 1px 2px #0001;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
.files {
|
| 106 |
+
margin-top: 32px;
|
| 107 |
+
background: #f1f5f9;
|
| 108 |
+
border-radius: 8px;
|
| 109 |
+
padding: 18px 18px 10px 18px;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.files h2 {
|
| 113 |
+
margin-top: 0;
|
| 114 |
+
color: #2563eb;
|
| 115 |
+
font-size: 1.2em;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.file {
|
| 119 |
+
background: #e0e7ef;
|
| 120 |
+
margin-bottom: 8px;
|
| 121 |
+
padding: 8px 12px;
|
| 122 |
+
border-radius: 5px;
|
| 123 |
+
font-size: 1em;
|
| 124 |
+
color: #334155;
|
| 125 |
+
display: flex;
|
| 126 |
+
align-items: center;
|
| 127 |
+
justify-content: space-between;
|
| 128 |
+
box-shadow: 0 1px 2px #0001;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
.file a {
|
| 132 |
+
color: #2563eb;
|
| 133 |
+
text-decoration: none;
|
| 134 |
+
font-weight: 500;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.file a:hover {
|
| 138 |
+
text-decoration: underline;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.footer {
|
| 142 |
+
margin-top: 36px;
|
| 143 |
+
text-align: center;
|
| 144 |
+
color: #64748b;
|
| 145 |
+
font-size: 0.95em;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
@media (max-width: 600px) {
|
| 149 |
+
.container {
|
| 150 |
+
padding: 12px 4vw 12px 4vw;
|
| 151 |
+
}
|
| 152 |
+
}
|
| 153 |
+
</style>
|
| 154 |
+
</head>
|
| 155 |
+
|
| 156 |
+
<body>
|
| 157 |
+
<div class="container">
|
| 158 |
+
<h1>Audio Recorder & Transcription</h1>
|
| 159 |
+
<div>
|
| 160 |
+
<label for="mic">Microphone Device</label>
|
| 161 |
+
<select id="mic" disabled>
|
| 162 |
+
<option value="1" selected>Microphone Device (#1)</option>
|
| 163 |
+
</select>
|
| 164 |
+
|
| 165 |
+
<label for="sys">System/Loopback Device (optional)</label>
|
| 166 |
+
<select id="sys" disabled>
|
| 167 |
+
<option value="16" selected>System Loopback Device (#16)</option>
|
| 168 |
+
</select>
|
| 169 |
+
|
| 170 |
+
<label for="chunk_secs">Chunk Length (seconds)</label>
|
| 171 |
+
<input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
|
| 172 |
+
|
| 173 |
+
<label for="model">Transcription Model</label>
|
| 174 |
+
<select id="model" disabled>
|
| 175 |
+
<option value="small">small</option>
|
| 176 |
+
<option value="medium" selected>medium</option>
|
| 177 |
+
<option value="large">large</option>
|
| 178 |
+
</select>
|
| 179 |
+
|
| 180 |
+
<div style="margin-top:18px; text-align:center;">
|
| 181 |
+
<button id="startBtn">Start Recording</button>
|
| 182 |
+
<button id="stopBtn" class="stop-btn" disabled>Stop Recording</button>
|
| 183 |
+
</div>
|
| 184 |
+
</div>
|
| 185 |
+
|
| 186 |
+
<div class="status" id="status"></div>
|
| 187 |
+
|
| 188 |
+
<div class="live">
|
| 189 |
+
<h2>Live Transcription</h2>
|
| 190 |
+
<div id="live"></div>
|
| 191 |
+
</div>
|
| 192 |
+
|
| 193 |
+
<div class="files">
|
| 194 |
+
<h2>Final Files</h2>
|
| 195 |
+
<div id="files"></div>
|
| 196 |
+
</div>
|
| 197 |
+
<div class="footer">
|
| 198 |
+
© 2025 Audio Multi-Transcript UI · Powered by Flask + PyAudio + Whisper
|
| 199 |
+
</div>
|
| 200 |
+
</div>
|
| 201 |
+
|
| 202 |
+
<script>
|
| 203 |
+
// --- Start/Stop Recording ---
|
| 204 |
+
let polling = null;
|
| 205 |
+
document.getElementById('startBtn').onclick = async function () {
|
| 206 |
+
const mic = 1; // static value
|
| 207 |
+
const sys = 16; // static value
|
| 208 |
+
const chunk_secs = 5; // static value
|
| 209 |
+
const model = "medium"; // static value
|
| 210 |
+
const no_transcribe = false;
|
| 211 |
+
document.getElementById('status').textContent = 'Starting...';
|
| 212 |
+
await fetch('/api/start-recording', {
|
| 213 |
+
method: 'POST',
|
| 214 |
+
headers: { 'Content-Type': 'application/json' },
|
| 215 |
+
body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
|
| 216 |
+
});
|
| 217 |
+
document.getElementById('startBtn').disabled = true;
|
| 218 |
+
document.getElementById('stopBtn').disabled = false;
|
| 219 |
+
pollStatus();
|
| 220 |
+
};
|
| 221 |
+
|
| 222 |
+
document.getElementById('stopBtn').onclick = async function () {
|
| 223 |
+
await fetch('/api/stop-recording', { method: 'POST' });
|
| 224 |
+
document.getElementById('status').textContent = 'Stopping...';
|
| 225 |
+
document.getElementById('stopBtn').disabled = true;
|
| 226 |
+
if (polling) clearInterval(polling);
|
| 227 |
+
setTimeout(() => { loadFiles(); document.getElementById('startBtn').disabled = false; }, 2000);
|
| 228 |
+
};
|
| 229 |
+
|
| 230 |
+
// --- Poll status ---
|
| 231 |
+
function pollStatus() {
|
| 232 |
+
polling = setInterval(async () => {
|
| 233 |
+
const res = await fetch('/api/recording-status');
|
| 234 |
+
const data = await res.json();
|
| 235 |
+
document.getElementById('status').textContent = data.recording ? 'Recording...' : 'Idle';
|
| 236 |
+
|
| 237 |
+
// --- Show live transcription ---
|
| 238 |
+
const liveDiv = document.getElementById('live');
|
| 239 |
+
liveDiv.innerHTML = '';
|
| 240 |
+
if (data.live_segments && data.live_segments.length) {
|
| 241 |
+
data.live_segments.slice(-10).forEach(seg => {
|
| 242 |
+
const div = document.createElement('div');
|
| 243 |
+
div.className = 'chunk';
|
| 244 |
+
div.innerHTML = `<b>${seg.speaker || 'Speaker'}:</b> [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.text}`;
|
| 245 |
+
liveDiv.appendChild(div);
|
| 246 |
+
});
|
| 247 |
+
} else {
|
| 248 |
+
liveDiv.textContent = 'No transcription yet.';
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
if (!data.recording) {
|
| 252 |
+
clearInterval(polling);
|
| 253 |
+
document.getElementById('startBtn').disabled = false;
|
| 254 |
+
document.getElementById('stopBtn').disabled = true;
|
| 255 |
+
loadFiles();
|
| 256 |
+
}
|
| 257 |
+
}, 1000);
|
| 258 |
+
}
|
| 259 |
+
// Helper to format time
|
| 260 |
+
function formatTime(s) {
|
| 261 |
+
if (s == null) return "0:00";
|
| 262 |
+
const mm = Math.floor(s / 60);
|
| 263 |
+
const ss = Math.floor(s % 60).toString().padStart(2, "0");
|
| 264 |
+
return `${mm}:${ss}`;
|
| 265 |
+
}
|
| 266 |
+
// --- Load final files ---
|
| 267 |
+
async function loadFiles() {
|
| 268 |
+
const filesDiv = document.getElementById('files');
|
| 269 |
+
filesDiv.innerHTML = '';
|
| 270 |
+
try {
|
| 271 |
+
const res = await fetch('/api/final-files');
|
| 272 |
+
const data = await res.json();
|
| 273 |
+
if (!data.files.length) {
|
| 274 |
+
filesDiv.textContent = 'No files yet.';
|
| 275 |
+
return;
|
| 276 |
+
}
|
| 277 |
+
data.files.forEach(f => {
|
| 278 |
+
const div = document.createElement('div');
|
| 279 |
+
div.className = 'file';
|
| 280 |
+
div.innerHTML = `<span>${f.name}</span> <a href="${f.url || f.path}" target="_blank">Download</a>`;
|
| 281 |
+
filesDiv.appendChild(div);
|
| 282 |
+
});
|
| 283 |
+
} catch (e) {
|
| 284 |
+
filesDiv.textContent = 'Error loading files.';
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
// --- On load ---
|
| 289 |
+
loadFiles();
|
| 290 |
+
</script>
|
| 291 |
+
</body>
|
| 292 |
+
</html>
|
templates/test_index3.html
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Audio Transcription Studio</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
|
| 11 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
|
| 12 |
+
<style>
|
| 13 |
+
body {
|
| 14 |
+
font-family: 'Inter', sans-serif;
|
| 15 |
+
background-color: #1a1a2e; /* Dark purple background */
|
| 16 |
+
color: #ffffff;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
.container-bg {
|
| 20 |
+
background-color: #2c2c44; /* Slightly lighter purple for containers */
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.panel-bg {
|
| 24 |
+
background-color: #22223b; /* Darker panel background */
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.input-field {
|
| 28 |
+
background-color: #3b3b55;
|
| 29 |
+
border: 1px solid #4a4a6b;
|
| 30 |
+
color: #e0e0e0;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.button-glow {
|
| 34 |
+
box-shadow: 0 0 10px 2px #6a1b9a;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.glow-text {
|
| 38 |
+
text-shadow: 0 0 8px #d1c4e9;
|
| 39 |
+
}
|
| 40 |
+
</style>
|
| 41 |
+
</head>
|
| 42 |
+
<body class="flex items-center justify-center min-h-screen p-8">
|
| 43 |
+
<div class="w-full max-w-6xl">
|
| 44 |
+
<!-- Main Header -->
|
| 45 |
+
<header class="text-center mb-10">
|
| 46 |
+
<h1 class="text-5xl font-extrabold text-[#d1c4e9] glow-text mb-2">Audio Transcription Studio</h1>
|
| 47 |
+
<p class="text-lg text-gray-400">Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.</p>
|
| 48 |
+
</header>
|
| 49 |
+
|
| 50 |
+
<!-- Main Content Grid -->
|
| 51 |
+
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
|
| 52 |
+
<!-- Left Panel: Live Transcription -->
|
| 53 |
+
<div class="lg:col-span-2 panel-bg p-8 rounded-2xl shadow-xl">
|
| 54 |
+
<h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-file-alt mr-2"></i> Live Transcription</h2>
|
| 55 |
+
|
| 56 |
+
<!-- Recording Status & Button -->
|
| 57 |
+
<div id="recording-status-area" class="flex flex-col items-center justify-center p-6 mb-8">
|
| 58 |
+
<div id="status-spinner" class="relative w-32 h-32 hidden">
|
| 59 |
+
<div class="absolute inset-0 border-4 border-purple-500 rounded-full animate-ping"></div>
|
| 60 |
+
<div class="absolute inset-4 border-4 border-purple-400 rounded-full animate-ping delay-200"></div>
|
| 61 |
+
<div class="absolute inset-8 border-4 border-purple-300 rounded-full animate-ping delay-400"></div>
|
| 62 |
+
<div class="flex items-center justify-center h-full w-full">
|
| 63 |
+
<i class="fas fa-microphone text-4xl text-white"></i>
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
<div id="status-icon" class="relative w-32 h-32 flex items-center justify-center bg-purple-600 rounded-full">
|
| 67 |
+
<i class="fas fa-microphone text-4xl text-white"></i>
|
| 68 |
+
</div>
|
| 69 |
+
<p id="status-text" class="mt-4 text-green-400 font-semibold text-lg">Ready to record</p>
|
| 70 |
+
<div id="start-stop-buttons" class="mt-4">
|
| 71 |
+
<button id="start-btn" class="bg-purple-600 hover:bg-purple-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 button-glow disabled:opacity-50 disabled:cursor-not-allowed">
|
| 72 |
+
Start Recording
|
| 73 |
+
</button>
|
| 74 |
+
<button id="stop-btn" class="bg-red-600 hover:bg-red-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 disabled:opacity-50 disabled:cursor-not-allowed hidden">
|
| 75 |
+
Stop Recording
|
| 76 |
+
</button>
|
| 77 |
+
</div>
|
| 78 |
+
</div>
|
| 79 |
+
|
| 80 |
+
<!-- Live Transcription Display -->
|
| 81 |
+
<div id="live-transcription" class="bg-[#1b1b2a] p-6 rounded-lg h-96 overflow-y-auto border border-[#3b3b55]">
|
| 82 |
+
<p class="text-gray-400 text-center text-lg mt-12">Start recording to see live transcription</p>
|
| 83 |
+
</div>
|
| 84 |
+
</div>
|
| 85 |
+
|
| 86 |
+
<!-- Right Panel: Recording Settings & Files -->
|
| 87 |
+
<div class="lg:col-span-1 space-y-8">
|
| 88 |
+
<!-- Recording Settings Panel -->
|
| 89 |
+
<div class="panel-bg p-8 rounded-2xl shadow-xl">
|
| 90 |
+
<h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-cogs mr-2"></i> Recording Settings</h2>
|
| 91 |
+
<div class="space-y-6">
|
| 92 |
+
<!-- Microphone Device -->
|
| 93 |
+
<div>
|
| 94 |
+
<label for="mic-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-microphone mr-2"></i>Microphone Device</label>
|
| 95 |
+
<select id="mic-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
|
| 96 |
+
<option value="">Loading devices...</option>
|
| 97 |
+
</select>
|
| 98 |
+
</div>
|
| 99 |
+
|
| 100 |
+
<!-- System Audio -->
|
| 101 |
+
<div>
|
| 102 |
+
<label for="sys-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-desktop mr-2"></i>System Audio (Optional)</label>
|
| 103 |
+
<select id="sys-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
|
| 104 |
+
<option value="null">None</option>
|
| 105 |
+
</select>
|
| 106 |
+
</div>
|
| 107 |
+
|
| 108 |
+
<!-- Chunk Length -->
|
| 109 |
+
<div>
|
| 110 |
+
<label for="chunk-secs-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-clock mr-2"></i>Chunk Length (seconds)</label>
|
| 111 |
+
<input type="number" id="chunk-secs-input" value="5" min="1" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
|
| 112 |
+
</div>
|
| 113 |
+
|
| 114 |
+
<!-- Transcription Model -->
|
| 115 |
+
<div>
|
| 116 |
+
<label for="model-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-brain mr-2"></i>Transcription Model</label>
|
| 117 |
+
<select id="model-input" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
|
| 118 |
+
<option value="medium">Medium (Balanced)</option>
|
| 119 |
+
<option value="small">Small</option>
|
| 120 |
+
<option value="large">Large</option>
|
| 121 |
+
</select>
|
| 122 |
+
</div>
|
| 123 |
+
|
| 124 |
+
<!-- Disable Transcription Toggle -->
|
| 125 |
+
<div class="flex items-center">
|
| 126 |
+
<input id="no-transcribe-checkbox" type="checkbox" class="h-5 w-5 text-purple-600 focus:ring-purple-500 rounded border-gray-600 bg-gray-700">
|
| 127 |
+
<label for="no-transcribe-checkbox" class="ml-2 block text-sm text-gray-300">Disable Transcription</label>
|
| 128 |
+
</div>
|
| 129 |
+
</div>
|
| 130 |
+
</div>
|
| 131 |
+
|
| 132 |
+
<!-- Recording Files Panel -->
|
| 133 |
+
<div class="panel-bg p-8 rounded-2xl shadow-xl">
|
| 134 |
+
<h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-folder-open mr-2"></i> Recording Files</h2>
|
| 135 |
+
<div id="final-files-list" class="space-y-2 text-gray-300">
|
| 136 |
+
<p class="text-gray-500">No files yet...</p>
|
| 137 |
+
</div>
|
| 138 |
+
</div>
|
| 139 |
+
</div>
|
| 140 |
+
</div>
|
| 141 |
+
</div>
|
| 142 |
+
|
| 143 |
+
<script>
|
| 144 |
+
const micSelect = document.getElementById('mic-select');
|
| 145 |
+
const sysSelect = document.getElementById('sys-select');
|
| 146 |
+
const chunkSecsInput = document.getElementById('chunk-secs-input');
|
| 147 |
+
const modelInput = document.getElementById('model-input');
|
| 148 |
+
const noTranscribeCheckbox = document.getElementById('no-transcribe-checkbox');
|
| 149 |
+
const startBtn = document.getElementById('start-btn');
|
| 150 |
+
const stopBtn = document.getElementById('stop-btn');
|
| 151 |
+
const statusText = document.getElementById('status-text');
|
| 152 |
+
const liveTranscription = document.getElementById('live-transcription');
|
| 153 |
+
const finalFilesList = document.getElementById('final-files-list');
|
| 154 |
+
const statusIcon = document.getElementById('status-icon');
|
| 155 |
+
const statusSpinner = document.getElementById('status-spinner');
|
| 156 |
+
|
| 157 |
+
let statusPollingInterval;
|
| 158 |
+
|
| 159 |
+
// Fetch available audio devices and populate the dropdowns
|
| 160 |
+
async function fetchDevices() {
|
| 161 |
+
try {
|
| 162 |
+
const response = await fetch('/api/devices');
|
| 163 |
+
const data = await response.json();
|
| 164 |
+
|
| 165 |
+
const micOptions = data.devices.map(device => `<option value="${device.index}">${device.name}</option>`).join('');
|
| 166 |
+
micSelect.innerHTML = micOptions;
|
| 167 |
+
|
| 168 |
+
const sysOptions = `<option value="null">None</option>` + micOptions;
|
| 169 |
+
sysSelect.innerHTML = sysOptions;
|
| 170 |
+
|
| 171 |
+
if (data.devices.length > 0) {
|
| 172 |
+
micSelect.value = data.devices[0].index;
|
| 173 |
+
}
|
| 174 |
+
} catch (error) {
|
| 175 |
+
console.error('Error fetching devices:', error);
|
| 176 |
+
micSelect.innerHTML = `<option>Error loading devices</option>`;
|
| 177 |
+
sysSelect.innerHTML = `<option>Error loading devices</option>`;
|
| 178 |
+
}
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
// Fetch final files and display them
|
| 182 |
+
async function fetchFinalFiles() {
|
| 183 |
+
try {
|
| 184 |
+
const response = await fetch('/api/final-files');
|
| 185 |
+
const data = await response.json();
|
| 186 |
+
if (data.files.length > 0) {
|
| 187 |
+
const filesHtml = data.files.map(file => `
|
| 188 |
+
<a href="${file.url}" class="flex items-center text-purple-400 hover:text-purple-300 transition-colors duration-200" target="_blank">
|
| 189 |
+
<i class="fas fa-file-waveform mr-2"></i><span>${file.name}</span>
|
| 190 |
+
</a>
|
| 191 |
+
`).join('');
|
| 192 |
+
finalFilesList.innerHTML = filesHtml;
|
| 193 |
+
} else {
|
| 194 |
+
finalFilesList.innerHTML = `<p class="text-gray-500">No files yet...</p>`;
|
| 195 |
+
}
|
| 196 |
+
} catch (error) {
|
| 197 |
+
console.error('Error fetching final files:', error);
|
| 198 |
+
finalFilesList.innerHTML = `<p class="text-red-500">Error loading files.</p>`;
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
// Poll the server for recording status and live segments
|
| 203 |
+
function startStatusPolling() {
|
| 204 |
+
statusPollingInterval = setInterval(async () => {
|
| 205 |
+
try {
|
| 206 |
+
const response = await fetch('/api/recording-status');
|
| 207 |
+
const data = await response.json();
|
| 208 |
+
|
| 209 |
+
if (data.recording) {
|
| 210 |
+
statusText.textContent = 'Recording...';
|
| 211 |
+
statusText.classList.remove('text-green-400');
|
| 212 |
+
statusText.classList.add('text-purple-400');
|
| 213 |
+
statusIcon.classList.add('hidden');
|
| 214 |
+
statusSpinner.classList.remove('hidden');
|
| 215 |
+
|
| 216 |
+
liveTranscription.innerHTML = '';
|
| 217 |
+
if (data.live_segments.length === 0) {
|
| 218 |
+
liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Recording started. Waiting for transcription...</p>`;
|
| 219 |
+
} else {
|
| 220 |
+
data.live_segments.forEach(segment => {
|
| 221 |
+
const p = document.createElement('p');
|
| 222 |
+
p.className = 'text-gray-200 mb-1 leading-snug';
|
| 223 |
+
p.innerHTML = `<span class="font-semibold text-purple-300">${segment.speaker}:</span> ${segment.text}`;
|
| 224 |
+
liveTranscription.appendChild(p);
|
| 225 |
+
});
|
| 226 |
+
liveTranscription.scrollTop = liveTranscription.scrollHeight;
|
| 227 |
+
}
|
| 228 |
+
fetchFinalFiles();
|
| 229 |
+
|
| 230 |
+
} else {
|
| 231 |
+
statusText.textContent = 'Ready to record';
|
| 232 |
+
statusText.classList.remove('text-purple-400');
|
| 233 |
+
statusText.classList.add('text-green-400');
|
| 234 |
+
statusIcon.classList.remove('hidden');
|
| 235 |
+
statusSpinner.classList.add('hidden');
|
| 236 |
+
clearInterval(statusPollingInterval);
|
| 237 |
+
startBtn.classList.remove('hidden');
|
| 238 |
+
stopBtn.classList.add('hidden');
|
| 239 |
+
fetchFinalFiles();
|
| 240 |
+
}
|
| 241 |
+
} catch (error) {
|
| 242 |
+
console.error('Error polling status:', error);
|
| 243 |
+
clearInterval(statusPollingInterval);
|
| 244 |
+
}
|
| 245 |
+
}, 1000);
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
// Start recording
|
| 249 |
+
startBtn.addEventListener('click', async () => {
|
| 250 |
+
const mic = micSelect.value;
|
| 251 |
+
const sys = sysSelect.value === 'null' ? null : sysSelect.value;
|
| 252 |
+
const chunk_secs = chunkSecsInput.value;
|
| 253 |
+
const model = modelInput.value;
|
| 254 |
+
const no_transcribe = noTranscribeCheckbox.checked;
|
| 255 |
+
|
| 256 |
+
try {
|
| 257 |
+
const response = await fetch('/api/start-recording', {
|
| 258 |
+
method: 'POST',
|
| 259 |
+
headers: { 'Content-Type': 'application/json' },
|
| 260 |
+
body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
|
| 261 |
+
});
|
| 262 |
+
|
| 263 |
+
if (response.ok) {
|
| 264 |
+
startBtn.classList.add('hidden');
|
| 265 |
+
stopBtn.classList.remove('hidden');
|
| 266 |
+
liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Starting recording...</p>`;
|
| 267 |
+
startStatusPolling();
|
| 268 |
+
} else {
|
| 269 |
+
const error = await response.json();
|
| 270 |
+
alert(`Error: ${error.error}`);
|
| 271 |
+
}
|
| 272 |
+
} catch (error) {
|
| 273 |
+
console.error('Failed to start recording:', error);
|
| 274 |
+
alert('Failed to start recording. Check server connection.');
|
| 275 |
+
}
|
| 276 |
+
});
|
| 277 |
+
|
| 278 |
+
// Stop recording
|
| 279 |
+
stopBtn.addEventListener('click', async () => {
|
| 280 |
+
try {
|
| 281 |
+
const response = await fetch('/api/stop-recording', {
|
| 282 |
+
method: 'POST'
|
| 283 |
+
});
|
| 284 |
+
if (response.ok) {
|
| 285 |
+
// Status polling will handle UI updates after the server stops
|
| 286 |
+
}
|
| 287 |
+
} catch (error) {
|
| 288 |
+
console.error('Failed to stop recording:', error);
|
| 289 |
+
}
|
| 290 |
+
});
|
| 291 |
+
|
| 292 |
+
// Initial setup on page load
|
| 293 |
+
window.onload = () => {
|
| 294 |
+
fetchDevices();
|
| 295 |
+
fetchFinalFiles();
|
| 296 |
+
};
|
| 297 |
+
|
| 298 |
+
</script>
|
| 299 |
+
</body>
|
| 300 |
+
</html>
|