Spaces:

prthm11
/

AudioTransDiar

Sleeping

App Files Files Community

prthm11 commited on Sep 2, 2025

Commit

4207399

verified ·

1 Parent(s): 105dda6

Upload 12 files

Browse files

Files changed (12) hide show

Dockerfile +12 -0
app.py +312 -0
app2.py +291 -0
merged.py +559 -0
rec_transcribe_extension.py +345 -0
requirements.txt +171 -0
static/icon_upload.png +0 -0
templates/index2.html +753 -0
templates/index2_upload.html +736 -0
templates/landing.html +160 -0
templates/test_index.html +292 -0
templates/test_index3.html +300 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.12.2
+WORKDIR /app
+COPY requirements.txt .
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+EXPOSE 7860
+CMD ["python", "merged.py"]

app.py ADDED Viewed

	@@ -0,0 +1,312 @@

+from flask import Flask, request, jsonify, send_from_directory, render_template
+import threading
+import time
+import os
+import queue
+from pathlib import Path
+import pyaudio
+from werkzeug.utils import secure_filename
+from rec_transcribe_extension import Transcriber, diarization_hook
+from rec_transcribe_extension import (
+    list_input_devices,
+    run_recording,
+    OUTPUT_DIR,
+    CHUNKS_DIR,
+    FINAL_WAV,)
+app = Flask(__name__)
+recording_thread = None
+recording_running = False
+recording_status = {
+    "recording": False,
+    "live_segments": []
+}
+# ------ Device Listing API ------
+@app.route("/api/devices", methods=["GET"])
+def api_devices():
+    pa = pyaudio.PyAudio()
+    devices = []
+    for i in range(pa.get_device_count()):
+        dev = pa.get_device_info_by_index(i)
+        if dev.get("maxInputChannels", 0) > 0:
+            devices.append({"index": dev["index"], "name": dev["name"]})
+    pa.terminate()
+    return jsonify({"devices": devices})
+# --- Start recording ---
+@app.route("/api/start-recording", methods=["POST"])
+def api_start_recording():
+    global recording_thread, stop_event, recording_status
+    data = request.json
+    # Validate required fields
+    try:
+        mic = int(data.get("mic"))
+    except Exception:
+        return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
+    # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
+    sys = None
+    if data.get("sys") not in (None, "", "null"):
+        try:
+            sys = int(data.get("sys"))
+        except Exception:
+            return jsonify({"error": "Invalid 'sys' parameter"}), 400
+    chunk_secs = int(data.get("chunk_secs", 5))
+    model = data.get("model", "medium")
+    no_transcribe = bool(data.get("no_transcribe", False))
+    if recording_status["recording"]:
+        return jsonify({"error": "Already recording"}), 400
+    # --- Validate that requested devices exist and have input channels ---
+    try:
+        pa = pyaudio.PyAudio()
+    except Exception as e:
+        return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
+    def device_is_valid(device_index):
+        try:
+            dev = pa.get_device_info_by_index(device_index)
+            return dev.get("maxInputChannels", 0) > 0
+        except Exception:
+            return False
+    if not device_is_valid(mic):
+        pa.terminate()
+        return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
+    if sys is not None and not device_is_valid(sys):
+        pa.terminate()
+        return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
+    pa.terminate()
+    # Reset state
+    recording_status["recording"] = True
+    recording_status["live_segments"] = []
+    stop_event = threading.Event()
+    def run():
+        # Patch: update live_segments after each chunk
+        from rec_transcribe_extension import chunk_writer_and_transcribe_worker
+        # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
+        import rec_transcribe_extension as rte
+        orig_worker = rte.chunk_writer_and_transcribe_worker
+        def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
+            while True:
+                try:
+                    filename, frames = in_queue.get(timeout=1.0)
+                except queue.Empty:
+                    if stop_event.is_set() and in_queue.empty():
+                        break
+                    continue
+                rte.save_wav_from_frames(
+                    filename, frames, nchannels=rte.CHANNELS)
+                final_frames_list.extend(frames)
+                diar = rte.diarization_hook(str(filename))
+                diar_segments = diar if diar else []
+                # Transcribe chunk and get segments with timestamps
+                if transcriber and transcriber.model:
+                    try:
+                        segments, info = transcriber.model.transcribe(
+                            str(filename), beam_size=5)
+                        for seg in segments:
+                            seg_start = seg.start
+                            seg_end = seg.end
+                            seg_text = seg.text.strip()
+                            speaker = "Unknown"
+                            for d_start, d_end, d_speaker in diar_segments:
+                                if (seg_start < d_end) and (seg_end > d_start):
+                                    speaker = d_speaker
+                                    break
+                            # Update live_segments for frontend
+                            recording_status["live_segments"].append({
+                                "start": float(seg_start),
+                                "end": float(seg_end),
+                                "speaker": str(speaker),
+                                "text": seg_text
+                            })
+                            # Write to transcript file as before
+                            line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
+                            with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                                tf.write(line)
+                    except Exception as e:
+                        print(f"Transcription error for {filename.name}: {e}")
+            print("Chunk writer/transcriber worker exiting.")
+        rte.chunk_writer_and_transcribe_worker = patched_worker
+        try:
+            rte.stop_event = stop_event
+            run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
+                          model_name=model, no_transcribe=no_transcribe)
+        finally:
+            rte.chunk_writer_and_transcribe_worker = orig_worker
+            recording_status["recording"] = False
+    recording_thread = threading.Thread(target=run, daemon=True)
+    recording_thread.start()
+    return jsonify({"ok": True})
+# --- Stop recording ---
+@app.route("/api/stop-recording", methods=["POST"])
+def api_stop_recording():
+    global stop_event
+    if stop_event:
+        stop_event.set()
+    return jsonify({"ok": True})
+# --- Poll status ---
+@app.route("/api/recording-status")
+def api_recording_status():
+    return jsonify(recording_status)
+# # serve saved uploads at /uploads/<filename>
+# @app.route('/uploads/<path:filename>')
+# def serve_uploaded(filename):
+#     return send_from_directory(str(OUTPUT_DIR), filename)
+# # --- upload pre-recorded files ---
+# @app.route("/api/upload", methods=["POST"])
+# def api_upload_file():
+#     """
+#     Accept a single file (form-data 'file'), save it into OUTPUT_DIR and return json
+#     { ok: True, filename: "<saved_name>", url: "/static/<saved_name>" }.
+#     """
+#     if 'file' not in request.files:
+#         return jsonify({"error": "No file provided"}), 400
+#     f = request.files['file']
+#     if f.filename == '':
+#         return jsonify({"error": "Empty filename"}), 400
+#     safe_name = secure_filename(f.filename)
+#     # prefix timestamp to avoid collisions
+#     ts = int(time.time() * 1000)
+#     saved_name = f"{ts}_{safe_name}"
+#     saved_path = OUTPUT_DIR / saved_name
+#     try:
+#         f.save(str(saved_path))
+#     except Exception as e:
+#         return jsonify({"error": f"Failed to save file: {e}"}), 500
+#     return jsonify({"ok": True, "filename": saved_name, "url": f"/static/{saved_name}"})
+# # --- Start server-side paced transcription for a saved WAV/MP3 file ---
+# @app.route("/api/start-transcribe-file", methods=["POST"])
+# def api_start_transcribe_file():
+#     """
+#     POST JSON { filename: "<saved_name>" }
+#     Spawns a background thread that transcribes the file using the Transcriber,
+#     and appends transcribed segments (with start/end/speaker/text) into
+#     recording_status["live_segments"] while setting recording_status["recording"]=True.
+#     The worker will pace segments to approximate 'live' streaming using seg.start timestamps.
+#     """
+#     global recording_status
+#     data = request.json or {}
+#     filename = data.get("filename")
+#     print("DEBUG: /api/start-transcribe-file called with:", filename, flush=True)
+#     if not filename:
+#         return jsonify({"error": "Missing 'filename'"}), 400
+#     file_path = OUTPUT_DIR / filename
+#     if not file_path.exists():
+#         return jsonify({"error": "File not found on server"}), 404
+#     # prevent concurrent transcription runs
+#     if recording_status.get("recording"):
+#         return jsonify({"error": "Another transcription/recording is already running"}), 400
+#     def worker():
+#         try:
+#             recording_status["recording"] = True
+#             recording_status["live_segments"] = []
+#             transcriber = Transcriber()
+#             if not transcriber.model:
+#                 # model not loaded/available
+#                 recording_status["recording"] = False
+#                 print("Transcription model not available; cannot transcribe file.")
+#                 return
+#             # perform diarization if available
+#             diar_segments = diarization_hook(str(file_path)) or []
+#             # get segments from model
+#             try:
+#                 segments, info = transcriber.model.transcribe(str(file_path), beam_size=5)
+#             except Exception as e:
+#                 print("Error during transcription:", e)
+#                 recording_status["recording"] = False
+#                 return
+#             # Stream the segments into recording_status with timing
+#             start_clock = time.time()
+#             for seg in segments:
+#                 # seg.start is seconds into the audio
+#                 wait_for = seg.start - (time.time() - start_clock)
+#                 if wait_for > 0:
+#                     time.sleep(wait_for)
+#                 # map speaker using diarization segments (best-effort overlap)
+#                 speaker = "Unknown"
+#                 for d_start, d_end, d_label in diar_segments:
+#                     if (seg.start < d_end) and (seg.end > d_start):
+#                         speaker = d_label
+#                         break
+#                 seg_obj = {
+#                     "start": float(seg.start),
+#                     "end": float(seg.end),
+#                     "speaker": str(speaker),
+#                     "text": seg.text.strip()
+#                 }
+#                 # append to shared status for frontend polling
+#                 recording_status.setdefault("live_segments", []).append(seg_obj)
+#                 # also append to transcript file for persistence (optional)
+#                 with open(rec_transcribe_extension.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+#                     line = f"[{filename}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
+#                     tf.write(line)
+#             # done streaming
+#             recording_status["recording"] = False
+#         except Exception as e:
+#             print("Error in transcription worker:", e)
+#             recording_status["recording"] = False
+#     t = threading.Thread(target=worker, daemon=True)
+#     t.start()
+#     return jsonify({"ok": True})
+# --- List final files ---
+@app.route("/api/final-files")
+def api_final_files():
+    files = []
+    out_dir = OUTPUT_DIR
+    for fname in os.listdir(out_dir):
+        if fname.endswith(".wav") or fname.endswith(".txt"):
+            files.append(
+                {"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"})
+    return jsonify({"files": files})
+# --- Serve static files (WAV, TXT) ---
+@app.route('/static/<path:filename>')
+def static_files(filename):
+    return send_from_directory(OUTPUT_DIR, filename)
+# --- Serve the frontend ---
+@app.route("/")
+def index():
+    return render_template("index2.html")
+if __name__ == "__main__":
+    app.run(port=5000, debug=True)

app2.py ADDED Viewed

	@@ -0,0 +1,291 @@

+# app.py
+"""
+Flask app to:
+1) serve the provided upload template,
+2) accept .mp3/.wav uploads and show an audio player,
+3) start/stop recording from a system loopback device when the audio element plays/pauses,
+4) stream live transcription back to the browser via Server-Sent Events (SSE).
+Notes:
+- Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.
+- Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.
+- This app assumes it runs on the same machine that has access to the local audio devices.
+"""
+import os
+import time
+import threading
+import pathlib
+from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
+from werkzeug.utils import secure_filename
+# import your recorder/transcriber helper (uploaded by you)
+import rec_transcribe_extension as rte
+UPLOAD_FOLDER = "uploads"
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
+app = Flask(__name__, static_folder=None)
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+# Globals for recording thread management
+recording_thread = None
+recording_lock = threading.Lock()
+def allowed_file(filename):
+    ext = pathlib.Path(filename).suffix.lower()
+    return ext in ALLOWED_EXT
+def find_system_loopback_index():
+    """
+    Try to find a likely loopback / system audio input device.
+    Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
+    'virtual', 'audio cable'. Otherwise fallback to default input device.
+    """
+    pa = None
+    try:
+        import pyaudio
+        pa = pyaudio.PyAudio()
+    except Exception:
+        return None
+    keywords = ["loop", "stereo", "mix", "what u hear",
+                "virtual", "audio cable", "loopback", "monitor"]
+    best_idx = None
+    for i in range(pa.get_device_count()):
+        try:
+            dev = pa.get_device_info_by_index(i)
+            name = (dev.get("name") or "").lower()
+            max_in = dev.get("maxInputChannels", 0)
+            if max_in <= 0:
+                continue
+            for kw in keywords:
+                if kw in name:
+                    best_idx = int(dev["index"])
+                    pa.terminate()
+                    return best_idx
+        except Exception:
+            continue
+    # fallback: default input device
+    try:
+        default_info = pa.get_default_input_device_info()
+        idx = int(default_info.get("index"))
+        pa.terminate()
+        return idx
+    except Exception:
+        if pa:
+            pa.terminate()
+        return None
+@app.route("/", methods=["GET"])
+def index():
+    return render_template("index2_upload.html")
+@app.route("/upload", methods=["POST"])
+def upload():
+    if 'file' not in request.files:
+        return jsonify(success=False, error="No file part"), 400
+    f = request.files['file']
+    if f.filename == '':
+        return jsonify(success=False, error="Empty filename"), 400
+    filename = secure_filename(f.filename)
+    if not allowed_file(filename):
+        return jsonify(success=False, error="Extension not allowed"), 400
+    # avoid collisions by prefixing timestamp
+    ts = int(time.time() * 1000)
+    filename = f"{ts}_{filename}"
+    save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    f.save(save_path)
+    url = f"/uploads/{filename}"
+    return jsonify(success=True, url=url, filename=filename)
+@app.route("/uploads/<path:filename>")
+def uploaded_file(filename):
+    return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
+@app.route("/start", methods=["POST"])
+def start_recording():
+    """
+    Start a background thread which calls rec_transcribe_extension.run_recording(...)
+    We try to detect a loopback device; if not found we pick the default input device.
+    """
+    global recording_thread
+    body = request.get_json(force=True, silent=True) or {}
+    filename = body.get('filename')
+    # Basic check: uploaded file exists (we don't actually play the file on the server,
+    # but it's a sanity check so user didn't start without uploading)
+    if filename:
+        if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
+            return jsonify(success=False, error="Uploaded file not found on server"), 400
+    with recording_lock:
+        # if there's an active recording, return ok
+        if recording_thread and recording_thread.is_alive():
+            return jsonify(success=True, message="Recording already running")
+        # clear any previous stop_event
+        try:
+            if hasattr(rte, 'stop_event'):
+                rte.stop_event = threading.Event()  # new event the run_recording will wait on
+        except Exception:
+            pass
+        # choose device: prefer loopback
+        dev_index = find_system_loopback_index()
+        if dev_index is None:
+            return jsonify(success=False, error="No suitable audio input device found on server"), 500
+        # Start the recording in a background thread
+        def target():
+            try:
+                from rec_transcribe_extension import chunk_writer_and_transcribe_worker
+                import rec_transcribe_extension as rte
+                orig_worker = rte.chunk_writer_and_transcribe_worker
+                def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
+                    while True:
+                        try:
+                            filename, frames = in_queue.get(timeout=1.0)
+                        except queue.Empty:
+                            if rte.stop_event.is_set() and in_queue.empty():
+                                break
+                            continue
+                        rte.save_wav_from_frames(
+                            filename, frames, nchannels=rte.CHANNELS)
+                        final_frames_list.extend(frames)
+                        diar_segments = rte.diarization_hook(str(filename)) or []
+                        if transcriber and transcriber.model:
+                            try:
+                                segments, info = transcriber.model.transcribe(
+                                    str(filename), beam_size=5)
+                                for seg in segments:
+                                    seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
+                                    speaker = "Unknown"
+                                    for d_start, d_end, d_speaker in diar_segments:
+                                        if (seg_start < d_end) and (seg_end > d_start):
+                                            speaker = d_speaker
+                                            break
+                                    # Write formatted diarization line
+                                    line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
+                                    with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                                        tf.write(line)
+                            except Exception as e:
+                                print(f"Transcription error for {filename}: {e}")
+                    print("Patched worker exiting.")
+                # apply patch
+                rte.chunk_writer_and_transcribe_worker = patched_worker
+                try:
+                    rte.run_recording(mic_index=dev_index, sys_index=None,
+                                    chunk_secs=getattr(
+                                        rte, 'CHUNK_DURATION_SECS', 3),
+                                    model_name=getattr(rte, 'MODEL_NAME', None),
+                                    no_transcribe=False)
+                finally:
+                    rte.chunk_writer_and_transcribe_worker = orig_worker
+            except Exception as e:
+                print("run_recording exception:", e)
+@app.route("/stop", methods=["POST"])
+def stop_recording():
+    """
+    Signal the rec_transcribe_extension stop_event to stop gracefully.
+    """
+    global recording_thread
+    with recording_lock:
+        # set the stop_event in module
+        if hasattr(rte, 'stop_event') and rte.stop_event is not None:
+            try:
+                rte.stop_event.set()
+            except Exception:
+                pass
+    return jsonify(success=True, message="Stop signal sent")
+def tail_transcript_file(path, stop_cond_fn=None):
+    """
+    Generator that tails the transcript file and yields SSE data lines.
+    If file doesn't exist yet, yield a short status message then keep waiting.
+    stop_cond_fn is a callable that when returns True will break.
+    """
+    last_pos = 0
+    sent_initial = False
+    while True:
+        if stop_cond_fn and stop_cond_fn():
+            break
+        if os.path.exists(path):
+            with open(path, "r", encoding="utf-8", errors="ignore") as fh:
+                fh.seek(last_pos)
+                lines = fh.readlines()
+                if lines:
+                    for ln in lines:
+                        ln = ln.strip()
+                        if ln:
+                            yield f"data: {ln}\n\n"
+                    last_pos = fh.tell()
+                    sent_initial = True
+                else:
+                    # no new lines
+                    time.sleep(0.25)
+        else:
+            if not sent_initial:
+                yield "data: [info] Transcript file not yet created. Waiting...\n\n"
+                sent_initial = True
+            time.sleep(0.5)
+    # final notification
+    yield "data: [info] Transcription ended.\n\n"
+@app.route("/events")
+def events():
+    """
+    SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
+    The stream ends when the module stop_event is set and the background recording thread finishes.
+    """
+    transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
+    if not transcript_path:
+        return Response("No transcript file configured", status=500)
+    transcript_path = str(transcript_path)
+    def stop_fn():
+        # stop when the recording thread is no longer alive AND the module stop_event is set
+        cond = False
+        try:
+            cond = (hasattr(rte, 'stop_event')
+                    and rte.stop_event is not None and rte.stop_event.is_set())
+        except Exception:
+            cond = False
+        # also stop if thread finished
+        t_alive = recording_thread.is_alive() if recording_thread is not None else False
+        # If stop requested and thread not alive -> end stream
+        return (cond and not t_alive)
+    return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
+                    mimetype="text/event-stream")
+@app.route("/status")
+def status():
+    running = False
+    if recording_thread and recording_thread.is_alive():
+        running = True
+    return jsonify(running=running)
+if __name__ == "__main__":
+    # run on localhost for local usage
+    app.run(host="0.0.0.0", port=7860, threaded=True)

merged.py ADDED Viewed

	@@ -0,0 +1,559 @@

+# main.py
+import os
+import time
+import threading
+import queue
+import pathlib
+import pyaudio
+from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
+from werkzeug.utils import secure_filename
+# your helper module
+import rec_transcribe_extension as rte
+from rec_transcribe_extension import Transcriber, diarization_hook, run_recording, OUTPUT_DIR
+app = Flask(__name__)
+UPLOAD_FOLDER = "uploads"
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
+def allowed_file(filename: str) -> bool:
+    """Check if file extension is allowed"""
+    ext = pathlib.Path(filename).suffix.lower()
+    return ext in ALLOWED_EXT
+# ---------------- Shared state ----------------
+recording_thread = None
+recording_running = False
+recording_lock = threading.Lock()
+recording_status = {
+    "recording": False,
+    "live_segments": []
+}
+# ---------------- Landing + Frontend ----------------
+@app.route("/")
+def landing():
+    return render_template("landing.html")
+@app.route("/live")
+def live_page():
+    return render_template("index2.html")
+@app.route("/upload")
+def upload_page():
+    return render_template("index2_upload.html")
+# ---------------- Device listing ----------------
+@app.route("/api/devices", methods=["GET"])
+def api_devices():
+    pa = pyaudio.PyAudio()
+    devices = []
+    for i in range(pa.get_device_count()):
+        dev = pa.get_device_info_by_index(i)
+        if dev.get("maxInputChannels", 0) > 0:
+            devices.append({"index": dev["index"], "name": dev["name"]})
+    pa.terminate()
+    return jsonify({"devices": devices})
+# --- Start recording ---
+@app.route("/api/start-recording", methods=["POST"])
+def api_start_recording():
+    global recording_thread, stop_event, recording_status
+    data = request.json
+    # Validate required fields
+    try:
+        mic = int(data.get("mic"))
+    except Exception:
+        return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
+    # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
+    sys = None
+    if data.get("sys") not in (None, "", "null"):
+        try:
+            sys = int(data.get("sys"))
+        except Exception:
+            return jsonify({"error": "Invalid 'sys' parameter"}), 400
+    chunk_secs = int(data.get("chunk_secs", 5))
+    model = data.get("model", "medium")
+    no_transcribe = bool(data.get("no_transcribe", False))
+    if recording_status["recording"]:
+        return jsonify({"error": "Already recording"}), 400
+    # --- Validate that requested devices exist and have input channels ---
+    try:
+        pa = pyaudio.PyAudio()
+    except Exception as e:
+        return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
+    def device_is_valid(device_index):
+        try:
+            dev = pa.get_device_info_by_index(device_index)
+            return dev.get("maxInputChannels", 0) > 0
+        except Exception:
+            return False
+    if not device_is_valid(mic):
+        pa.terminate()
+        return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
+    if sys is not None and not device_is_valid(sys):
+        pa.terminate()
+        return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
+    pa.terminate()
+    # Reset state
+    recording_status["recording"] = True
+    recording_status["live_segments"] = []
+    stop_event = threading.Event()
+    def run():
+        # Patch: update live_segments after each chunk
+        from rec_transcribe_extension import chunk_writer_and_transcribe_worker
+        # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
+        import rec_transcribe_extension as rte
+        orig_worker = rte.chunk_writer_and_transcribe_worker
+        def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
+            while True:
+                try:
+                    filename, frames = in_queue.get(timeout=1.0)
+                except queue.Empty:
+                    if stop_event.is_set() and in_queue.empty():
+                        break
+                    continue
+                rte.save_wav_from_frames(
+                    filename, frames, nchannels=rte.CHANNELS)
+                final_frames_list.extend(frames)
+                diar = rte.diarization_hook(str(filename))
+                diar_segments = diar if diar else []
+                # Transcribe chunk and get segments with timestamps
+                if transcriber and transcriber.model:
+                    try:
+                        segments, info = transcriber.model.transcribe(
+                            str(filename), beam_size=5)
+                        for seg in segments:
+                            seg_start = seg.start
+                            seg_end = seg.end
+                            seg_text = seg.text.strip()
+                            speaker = "Unknown"
+                            for d_start, d_end, d_speaker in diar_segments:
+                                if (seg_start < d_end) and (seg_end > d_start):
+                                    speaker = d_speaker
+                                    break
+                            # Update live_segments for frontend
+                            recording_status["live_segments"].append({
+                                "start": float(seg_start),
+                                "end": float(seg_end),
+                                "speaker": str(speaker),
+                                "text": seg_text
+                            })
+                            # Write to transcript file as before
+                            line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
+                            with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                                tf.write(line)
+                    except Exception as e:
+                        print(f"Transcription error for {filename.name}: {e}")
+            print("Chunk writer/transcriber worker exiting.")
+        rte.chunk_writer_and_transcribe_worker = patched_worker
+        try:
+            rte.stop_event = stop_event
+            run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
+                          model_name=model, no_transcribe=no_transcribe)
+        finally:
+            rte.chunk_writer_and_transcribe_worker = orig_worker
+            recording_status["recording"] = False
+    recording_thread = threading.Thread(target=run, daemon=True)
+    recording_thread.start()
+    return jsonify({"ok": True})
+# # ---------------- Recording APIs ----------------
+# @app.route("/api/start-recording", methods=["POST"])
+# def api_start_recording():
+#     global recording_thread, recording_status
+#     data = request.json or {}
+#     mic = int(data.get("mic", -1))
+#     sys = data.get("sys")
+#     if sys in (None, "", "null"):
+#         sys = None
+#     else:
+#         sys = int(sys)
+#     chunk_secs = int(data.get("chunk_secs", 5))
+#     model = data.get("model", "medium")
+#     no_transcribe = bool(data.get("no_transcribe", False))
+#     if recording_status["recording"]:
+#         return jsonify({"error": "Already recording"}), 400
+#     # validate devices
+#     pa = pyaudio.PyAudio()
+#     def valid(dev_idx):
+#         try:
+#             dev = pa.get_device_info_by_index(dev_idx)
+#             return dev.get("maxInputChannels", 0) > 0
+#         except Exception:
+#             return False
+#     if not valid(mic):
+#         pa.terminate()
+#         return jsonify({"error": f"Mic device {mic} invalid"}), 400
+#     if sys is not None and not valid(sys):
+#         pa.terminate()
+#         return jsonify({"error": f"System device {sys} invalid"}), 400
+#     pa.terminate()
+#     # reset state
+#     recording_status["recording"] = True
+#     recording_status["live_segments"] = []
+#     rte.stop_event = threading.Event()
+#     def run():
+#         try:
+#             run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
+#                           model_name=model, no_transcribe=no_transcribe)
+#         finally:
+#             recording_status["recording"] = False
+#     recording_thread = threading.Thread(target=run, daemon=True)
+#     recording_thread.start()
+#     return jsonify({"ok": True})
+@app.route("/api/stop-recording", methods=["POST"])
+def api_stop_recording():
+    if hasattr(rte, "stop_event") and rte.stop_event:
+        rte.stop_event.set()
+    return jsonify({"ok": True})
+@app.route("/api/recording-status")
+def api_recording_status():
+    return jsonify({
+        "recording": recording_status.get("recording", False),
+        "live_segments": recording_status.get("live_segments", [])
+    })
+# ---------------- Upload-based APIs ----------------
+@app.route("/api/upload", methods=["POST"])
+def api_upload_file():
+    if 'file' not in request.files:
+        return jsonify(success=False, error="No file part"), 400
+    f = request.files['file']
+    if f.filename == '':
+        return jsonify(success=False, error="Empty filename"), 400
+    filename = secure_filename(f.filename)
+    if not allowed_file(filename):
+        return jsonify(success=False, error="Extension not allowed"), 400
+    # avoid collisions by prefixing timestamp
+    ts = int(time.time() * 1000)
+    filename = f"{ts}_{filename}"
+    save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    f.save(save_path)
+    url = f"/uploads/{filename}"
+    return jsonify(success=True, url=url, filename=filename)
+# ---------------- File serving ----------------
+@app.route("/uploads/<path:filename>")
+def uploaded_file(filename):
+    return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
+# @app.route("/api/start-transcribe-file", methods=["POST"])
+# def api_start_transcribe_file():
+#     data = request.json or {}
+#     filename = data.get("filename")
+#     file_path = OUTPUT_DIR / filename
+#     if not file_path.exists():
+#         return jsonify({"error": "File not found"}), 404
+#     if recording_status.get("recording"):
+#         return jsonify({"error": "Busy"}), 400
+#     def worker():
+#         try:
+#             recording_status["recording"] = True
+#             recording_status["live_segments"] = []
+#             transcriber = Transcriber()
+#             diar_segments = diarization_hook(str(file_path)) or []
+#             segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
+#             start_clock = time.time()
+#             for seg in segments:
+#                 wait_for = seg.start - (time.time() - start_clock)
+#                 if wait_for > 0:
+#                     time.sleep(wait_for)
+#                 speaker = "Unknown"
+#                 for d_start, d_end, d_label in diar_segments:
+#                     if (seg.start < d_end) and (seg.end > d_start):
+#                         speaker = d_label
+#                         break
+#                 seg_obj = {
+#                     "start": float(seg.start),
+#                     "end": float(seg.end),
+#                     "speaker": speaker,
+#                     "text": seg.text.strip()
+#                 }
+#                 recording_status["live_segments"].append(seg_obj)
+#                 # --- NEW: also append to transcript file so /events SSE can stream it ---
+#                 line = f"{seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
+#                 with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+#                     tf.write(line)
+#             recording_status["recording"] = False
+#         except Exception as e:
+#             print("Error in file transcription:", e)
+#             recording_status["recording"] = False
+#     threading.Thread(target=worker, daemon=True).start()
+#     return jsonify({"ok": True})
+def find_system_loopback_index():
+    """
+    Try to find a likely loopback / system audio input device.
+    Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
+    'virtual', 'audio cable'. Otherwise fallback to default input device.
+    """
+    pa = None
+    try:
+        import pyaudio
+        pa = pyaudio.PyAudio()
+    except Exception:
+        return None
+    keywords = ["loop", "stereo", "mix", "what u hear", "virtual", "audio cable", "loopback", "monitor"]
+    best_idx = None
+    for i in range(pa.get_device_count()):
+        try:
+            dev = pa.get_device_info_by_index(i)
+            name = (dev.get("name") or "").lower()
+            max_in = dev.get("maxInputChannels", 0)
+            if max_in <= 0:
+                continue
+            for kw in keywords:
+                if kw in name:
+                    best_idx = int(dev["index"])
+                    pa.terminate()
+                    return best_idx
+        except Exception:
+            continue
+    try:
+        default_info = pa.get_default_input_device_info()
+        idx = int(default_info.get("index"))
+        pa.terminate()
+        return idx
+    except Exception:
+        if pa:
+            pa.terminate()
+        return None
+@app.route("/api/start-transcribe-file", methods=["POST"])
+def api_start_transcribe_file():
+    """
+    Start a background thread which calls rec_transcribe_extension.run_recording(...)
+    We try to detect a loopback device; if not found we pick the default input device.
+    """
+    global recording_thread
+    body = request.get_json(force=True, silent=True) or {}
+    filename = body.get('filename')
+    # Basic check: uploaded file exists (we don't actually play the file on the server,
+    # but it's a sanity check so user didn't start without uploading)
+    if filename:
+        if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
+            return jsonify(success=False, error="Uploaded file not found on server"), 400
+    with recording_lock:
+        # if there's an active recording, return ok
+        if recording_thread and recording_thread.is_alive():
+            return jsonify(success=True, message="Recording already running")
+        # clear any previous stop_event
+        try:
+            if hasattr(rte, 'stop_event'):
+                rte.stop_event = threading.Event()  # new event the run_recording will wait on
+        except Exception:
+            pass
+        # choose device: prefer loopback
+        dev_index = find_system_loopback_index()
+        if dev_index is None:
+            return jsonify(success=False, error="No suitable audio input device found on server"), 500
+        # Start the recording in a background thread
+        def target():
+            try:
+                from rec_transcribe_extension import chunk_writer_and_transcribe_worker
+                import rec_transcribe_extension as rte
+                orig_worker = rte.chunk_writer_and_transcribe_worker
+                def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
+                    while True:
+                        try:
+                            filename, frames = in_queue.get(timeout=1.0)
+                        except queue.Empty:
+                            if rte.stop_event.is_set() and in_queue.empty():
+                                break
+                            continue
+                        rte.save_wav_from_frames(filename, frames, nchannels=rte.CHANNELS)
+                        final_frames_list.extend(frames)
+                        diar_segments = rte.diarization_hook(str(filename)) or []
+                        if transcriber and transcriber.model:
+                            try:
+                                segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
+                                for seg in segments:
+                                    seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
+                                    speaker = "Unknown"
+                                    for d_start, d_end, d_speaker in diar_segments:
+                                        if (seg_start < d_end) and (seg_end > d_start):
+                                            speaker = d_speaker
+                                            break
+                                    # Write diarized transcript line
+                                    line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
+                                    with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                                        tf.write(line)
+                            except Exception as e:
+                                print(f"Transcription error for {filename}: {e}")
+                    print("Patched worker exiting.")
+                # Apply patch
+                rte.chunk_writer_and_transcribe_worker = patched_worker
+                try:
+                    rte.run_recording(
+                        mic_index=dev_index,
+                        sys_index=None,
+                        chunk_secs=getattr(rte, 'CHUNK_DURATION_SECS', 3),
+                        model_name=getattr(rte, 'MODEL_NAME', None),
+                        no_transcribe=False
+                    )
+                finally:
+                    rte.chunk_writer_and_transcribe_worker = orig_worker
+            except Exception as e:
+                print("run_recording exception:", e)
+        recording_thread = threading.Thread(target=target, daemon=True)
+        recording_thread.start()
+        return jsonify(success=True, message="Recording started", device_index=dev_index)
+# @app.route("/static/<path:filename>")
+# def static_files(filename):
+#     return send_from_directory(OUTPUT_DIR, filename)
+@app.route("/stop", methods=["POST"])
+def stop_recording():
+    """
+    Signal the rec_transcribe_extension stop_event to stop gracefully.
+    """
+    global recording_thread
+    with recording_lock:
+        # set the stop_event in module
+        if hasattr(rte, 'stop_event') and rte.stop_event is not None:
+            try:
+                rte.stop_event.set()
+            except Exception:
+                pass
+    return jsonify(success=True, message="Stop signal sent")
+def tail_transcript_file(path, stop_cond_fn=None):
+    """
+    Generator that tails the transcript file and yields SSE data lines.
+    If file doesn't exist yet, yield a short status message then keep waiting.
+    stop_cond_fn is a callable that when returns True will break.
+    """
+    last_pos = 0
+    sent_initial = False
+    while True:
+        if stop_cond_fn and stop_cond_fn():
+            break
+        if os.path.exists(path):
+            with open(path, "r", encoding="utf-8", errors="ignore") as fh:
+                fh.seek(last_pos)
+                lines = fh.readlines()
+                if lines:
+                    for ln in lines:
+                        ln = ln.strip()
+                        if ln:
+                            yield f"data: {ln}\n\n"
+                    last_pos = fh.tell()
+                    sent_initial = True
+                else:
+                    # no new lines
+                    time.sleep(0.25)
+        else:
+            if not sent_initial:
+                yield "data: [info] Transcript file not yet created. Waiting...\n\n"
+                sent_initial = True
+            time.sleep(0.5)
+    # final notification
+    yield "data: [info] Transcription ended.\n\n"
+# ---------------- SSE events (from app2) ----------------
+@app.route("/events")
+def events():
+    """
+    SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
+    The stream ends when the module stop_event is set and the background recording thread finishes.
+    """
+    transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
+    if not transcript_path:
+        return Response("No transcript file configured", status=500)
+    transcript_path = str(transcript_path)
+    def stop_fn():
+        # stop when the recording thread is no longer alive AND the module stop_event is set
+        cond = False
+        try:
+            cond = (hasattr(rte, 'stop_event')
+                    and rte.stop_event is not None and rte.stop_event.is_set())
+        except Exception:
+            cond = False
+        # also stop if thread finished
+        t_alive = recording_thread.is_alive() if recording_thread is not None else False
+        # If stop requested and thread not alive -> end stream
+        return (cond and not t_alive)
+    return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
+                    mimetype="text/event-stream")
+@app.route("/status")
+def status():
+    running = False
+    if recording_thread and recording_thread.is_alive():
+        running = True
+    return jsonify(running=running)
+# ---------------- Run ----------------
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=True)

rec_transcribe_extension.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import os
+import time
+import wave
+import queue
+import threading
+import datetime
+from pathlib import Path
+import wave
+import pyaudio
+from pyannote.audio import Pipeline
+try:
+    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
+    DIARIZATION_AVAILABLE = True
+except Exception:
+    diarization_pipeline = None
+    DIARIZATION_AVAILABLE = False
+# Optional modules (import safely)
+try:
+    from faster_whisper import WhisperModel
+    FASTER_WHISPER_AVAILABLE = True
+except Exception:
+    FASTER_WHISPER_AVAILABLE = False
+import numpy as np
+# # Optional: voice activity detection
+# try:
+#     import webrtcvad
+#     VAD_AVAILABLE = True
+# except Exception:
+#     VAD_AVAILABLE = False
+# ========== CONFIG ==========
+RUN_TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+FORMAT = pyaudio.paInt16
+CHANNELS = 1  # mono
+# RATE =  16000
+RATE = 44100
+CHUNK = 1024  # frames per buffer read
+CHUNK_DURATION_SECS = 5              # how long each saved chunk is (seconds)
+OUTPUT_DIR = Path("output_transcript_diarization")
+CHUNKS_DIR = OUTPUT_DIR / f"chunks_{RUN_TIMESTAMP}"
+FINAL_WAV = OUTPUT_DIR / f"recorded_audio_{RUN_TIMESTAMP}.wav"
+TRANSCRIPT_FILE = OUTPUT_DIR / f"transcript_{RUN_TIMESTAMP}.txt"
+MODEL_NAME = "medium"  # if using faster-whisper; change as desired
+# ============================
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+CHUNKS_DIR.mkdir(parents=True, exist_ok=True)
+audio = pyaudio.PyAudio()
+def list_input_devices():
+    '''
+    Lists all available audio input devices (microphones, loopbacks, etc.) with their
+    indices and channel counts.
+    '''
+    pa = pyaudio.PyAudio()
+    print("Available audio devices (inputs):")
+    for i in range(pa.get_device_count()):
+        dev = pa.get_device_info_by_index(i)
+        if dev.get("maxInputChannels", 0) > 0:
+            print(f"  {i}: {dev['name']} - {dev['maxInputChannels']} chans")
+    pa.terminate()
+def open_stream_for_device(device_index, channels=1):
+    '''
+    Opens a PyAudio input stream for the given device index and channel count.
+    '''
+    stream = audio.open(format=FORMAT,
+                        channels=channels,
+                        rate=RATE,
+                        input=True,
+                        frames_per_buffer=CHUNK,
+                        input_device_index=device_index)
+    return stream
+def save_wav_from_frames(path: Path, frames: list, nchannels=1):
+    '''
+    Saves a list of audio frames as a WAV file at the specified path.
+    '''
+    # Normalize of 44100 Rate
+    raw = b''.join(frames)
+    audio_array = np.frombuffer(raw, dtype=np.int16)
+    # Normalize: scale to 90% of int16 range
+    if np.max(np.abs(audio_array)) > 0:
+        audio_array = (audio_array / np.max(np.abs(audio_array)) * 32767 * 0.9).astype(np.int16)
+    with wave.open(str(path), 'wb') as wf:
+        wf.setnchannels(nchannels)
+        wf.setsampwidth(audio.get_sample_size(FORMAT))
+        wf.setframerate(RATE)
+        # wf.writeframes(b''.join(frames))
+        wf.writeframes(audio_array.tobytes())
+def merge_mono_files_to_stereo(mic_path: Path, sys_path: Path, out_path: Path):
+    """
+    Create simple stereo WAV: mic -> left channel, system -> right channel.
+    Requires numpy. Very naive — works when both inputs have same sample rate and length.
+    """
+    with wave.open(str(mic_path), 'rb') as wm, wave.open(str(sys_path), 'rb') as ws:
+        assert wm.getframerate() == ws.getframerate() == RATE
+        sampwidth = wm.getsampwidth()
+        nframes = min(wm.getnframes(), ws.getnframes())
+        mic_bytes = wm.readframes(nframes)
+        sys_bytes = ws.readframes(nframes)
+    # convert bytes to int16
+    mic_arr = np.frombuffer(mic_bytes, dtype=np.int16)
+    sys_arr = np.frombuffer(sys_bytes, dtype=np.int16)
+    # interleave into stereo
+    stereo = np.empty((nframes * 2,), dtype=np.int16)
+    stereo[0::2] = mic_arr[:nframes]
+    stereo[1::2] = sys_arr[:nframes]
+    with wave.open(str(out_path), 'wb') as wf:
+        wf.setnchannels(2)
+        wf.setsampwidth(sampwidth)
+        wf.setframerate(RATE)
+        wf.writeframes(stereo.tobytes())
+class Transcriber:
+    def __init__(self):
+        '''
+        Loads the faster-whisper model if available.
+        '''
+        self.model = None
+        if FASTER_WHISPER_AVAILABLE:
+            print("Loading faster-whisper model. This may take some time...")
+            # detect device via torch.cuda if available, otherwise CPU
+            try:
+                import torch
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+            except Exception:
+                device = "cpu"
+            # choose a safe compute_type: float16 on GPU, float32 on CPU
+            compute_type = "float16" if device == "cuda" else "float32"
+            try:
+                # instantiate model (may download weights on first run)
+                self.model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
+                print(f"Model loaded on {device} (compute_type={compute_type}).")
+            except Exception as e:
+                print("Failed to load faster-whisper model:", e)
+                print("Continuing without transcription.")
+                self.model = None
+        else:
+            print("faster-whisper not available. Transcription will be disabled.")
+    def transcribe_file(self, wav_path: str):
+        '''
+        Transcribes a WAV file and returns the text.
+        '''
+        if not self.model:
+            return None
+        try:
+            segments, info = self.model.transcribe(wav_path, beam_size=5)
+            text = " ".join([seg.text.strip() for seg in segments])
+            return text
+        except Exception as e:
+            print(f"Transcription error for {wav_path}: {e}")
+            return None
+def diarization_hook(audio_path: str):
+    """
+    Run speaker diarization and return list of (start, end, speaker) tuples.
+    """
+    if not DIARIZATION_AVAILABLE:
+        return None
+    diarization = diarization_pipeline(audio_path)
+    results = []
+    for turn, _, speaker in diarization.itertracks(yield_label=True):
+        results.append((turn.start, turn.end, speaker))
+    return results
+# Recorder threads
+def record_loop(device_index, out_queue, label="mic"):
+    """
+    Continuously read bytes from device stream and push full-second frames to queue.
+    """
+    try:
+        stream = open_stream_for_device(device_index, channels=CHANNELS)
+    except Exception as e:
+        print(f"Could not open stream for device {device_index} ({label}): {e}")
+        return
+    frames_per_chunk = int(RATE / CHUNK * CHUNK_DURATION_SECS)
+    frames = []
+    print(f"Recording from device {device_index} ({label}) ... Press Ctrl+C to stop.")
+    error_count = 0
+    try:
+        while True:
+            try:
+                data = stream.read(CHUNK, exception_on_overflow=False)
+                error_count = 0  # reset on success
+            except Exception as e:
+                print(f"Read error on device {device_index} ({label}): {e}")
+                error_count += 1
+                if error_count > 10:
+                    print(f"Too many errors on device {device_index} ({label}). Stopping this thread.")
+                    break
+                continue
+            frames.append(data)
+            if len(frames) >= frames_per_chunk:
+                ts = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
+                filename = CHUNKS_DIR / f"{label}_{ts}.wav"
+                out_queue.put((filename, frames.copy()))
+                frames = []
+    except KeyboardInterrupt:
+        print(f"Recording thread {label} received KeyboardInterrupt.")
+    finally:
+        try:
+            stream.stop_stream()
+            stream.close()
+        except Exception:
+            pass
+        print(f"Recording thread for {label} exited.")
+def chunk_writer_and_transcribe_worker(in_queue: queue.Queue, final_frames_list: list, transcriber: Transcriber, single_channel_label="mic"):
+    """
+    Save chunk WAV files and optionally send for transcription.
+    Also store frames for final concatenated WAV.
+    """
+    while True:
+        try:
+            filename, frames = in_queue.get(timeout=1.0)
+        except queue.Empty:
+            if stop_event.is_set() and in_queue.empty():
+                break
+            continue
+        save_wav_from_frames(filename, frames, nchannels=CHANNELS)
+        print(f"Saved chunk: {filename.name}")
+        final_frames_list.extend(frames)
+        diar = diarization_hook(str(filename))
+        diar_segments = []
+        if diar:
+            diar_segments = diar  # list of (start, end, speaker)
+        # Transcribe chunk and get segments with timestamps
+        if transcriber and transcriber.model:
+            try:
+                # Get segments with timestamps
+                segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
+                # For each segment, find the speaker
+                for seg in segments:
+                    seg_start = seg.start
+                    seg_end = seg.end
+                    seg_text = seg.text.strip()
+                    # Find speaker whose segment overlaps with this transcription segment
+                    speaker = "Unknown"
+                    for d_start, d_end, d_speaker in diar_segments:
+                        # If diarization segment overlaps with transcription segment
+                        if (seg_start < d_end) and (seg_end > d_start):
+                            speaker = d_speaker
+                            break
+                    line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
+                    print(line.strip())
+                    with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                        tf.write(line)
+            except Exception as e:
+                print(f"Transcription error for {filename.name}: {e}")
+    print("Chunk writer/transcriber worker exiting.")
+def run_recording(mic_index, sys_index=None, chunk_secs=CHUNK_DURATION_SECS, model_name=MODEL_NAME, no_transcribe=False):
+        global CHUNK_DURATION_SECS, MODEL_NAME, stop_event
+        CHUNK_DURATION_SECS = chunk_secs
+        MODEL_NAME = model_name
+        # Start transcriber if enabled
+        transcriber = None if no_transcribe else Transcriber()
+        # Queues and threads
+        q = queue.Queue()
+        final_frames = []
+        if 'stop_event' not in globals() or stop_event is None:
+            stop_event = threading.Event()
+        mic_thread = threading.Thread(target=record_loop, args=(mic_index, q, "mic"), daemon=True)
+        mic_thread.start()
+        q_sys = None
+        writer_thread_sys = None
+        final_frames_sys = []
+        if sys_index is not None:
+            q_sys = queue.Queue()
+            sys_thread = threading.Thread(target=record_loop, args=(sys_index, q_sys, "sys"), daemon=True)
+            sys_thread.start()
+            writer_thread_sys = threading.Thread(target=chunk_writer_and_transcribe_worker, args=(q_sys, final_frames_sys, None, "sys"), daemon=True)
+            writer_thread_sys.start()
+        writer_thread = threading.Thread(target=chunk_writer_and_transcribe_worker, args=(q, final_frames, transcriber, "mic"), daemon=True)
+        writer_thread.start()
+        # try:
+        #     while True:
+        #         time.sleep(0.5)
+        # except KeyboardInterrupt:
+        #     print("\nStopping all threads...")
+        #     stop_event.set()
+        #     time.sleep(1.0)
+        try:
+            # wait until the shared stop_event is set by the caller (Flask / api_stop-recording)
+            while not stop_event.is_set():
+                time.sleep(0.5)
+        except KeyboardInterrupt:
+            print("\nStopping all threads.")
+            stop_event.set()
+            time.sleep(1.0)
+        writer_thread.join(timeout=5)
+        if writer_thread_sys:
+            writer_thread_sys.join(timeout=5)
+        if final_frames:
+            save_wav_from_frames(FINAL_WAV, final_frames, nchannels=CHANNELS)
+            print(f"Saved final WAV: {FINAL_WAV}")
+        if final_frames and final_frames_sys:
+            final_sys_wav = OUTPUT_DIR / "recorded_system_full.wav"
+            save_wav_from_frames(final_sys_wav, final_frames_sys, nchannels=CHANNELS)
+            stereo_path = OUTPUT_DIR / "recorded_audio_stereo.wav"
+            merge_mono_files_to_stereo(FINAL_WAV, final_sys_wav, stereo_path)
+            print(f"Saved merged stereo WAV: {stereo_path}")
+        audio.terminate()
+        print("Done. Transcript (if any) saved to:", TRANSCRIPT_FILE)
+# Main
+if __name__ == "__main__":
+    list_input_devices()
+    mic_index = input("\nEnter the device index for your microphone (or press ENTER to use default): ").strip()
+    if mic_index == "":
+        mic_index = pyaudio.PyAudio().get_default_input_device_info()['index']
+    else:
+        mic_index = int(mic_index)
+    run_recording(mic_index)

requirements.txt ADDED Viewed

	@@ -0,0 +1,171 @@

+aiohappyeyeballs
+aiohttp
+aioice
+aiortc
+aiosignal
+alembic
+antlr4-python3-runtime
+asteroid-filterbanks
+asttokens
+attrs
+audioread
+av
+bidict
+blinker
+certifi
+cffi
+charset-normalizer
+click
+colorama
+coloredlogs
+colorlog
+comm
+contourpy
+cryptography
+ctranslate2
+cycler
+debugpy
+decorator
+dnspython
+docopt
+einops
+eventlet
+executing
+faster-whisper
+filelock
+Flask
+Flask-SocketIO
+flatbuffers
+fonttools
+frozenlist
+fsspec
+google-crc32c
+greenlet
+h11
+huggingface-hub
+humanfriendly
+HyperPyYAML
+idna
+ifaddr
+ipykernel
+ipython
+ipython_pygments_lexers
+itsdangerous
+jedi
+Jinja2
+joblib
+julius
+jupyter_client
+jupyter_core
+kiwisolver
+lazy_loader
+librosa
+lightning
+lightning-utilities
+llvmlite
+Mako
+markdown-it-py
+MarkupSafe
+matplotlib
+matplotlib-inline
+mdurl
+more-itertools
+mpmath
+msgpack
+multidict
+mypy_extensions
+nest-asyncio
+networkx
+numba
+numpy
+omegaconf
+onnxruntime
+openai-whisper
+optuna
+packaging
+pandas
+parso
+pillow
+platformdirs
+pooch
+primePy
+prompt_toolkit
+propcache
+protobuf
+psutil
+pure_eval
+pyannotate
+pyannote.audio
+pyannote.core
+pyannote.database
+pyannote.metrics
+pyannote.pipeline
+PyAudio
+pycparser
+pydub
+pyee
+Pygments
+pylibsrtp
+pyOpenSSL
+pyparsing
+pyreadline3
+python-dateutil
+python-engineio
+python-socketio
+pytorch-lightning
+pytorch-metric-learning
+pytz
+pywin32
+PyYAML
+pyzmq
+regex
+requests
+resampy
+Resemblyzer
+rich
+ruamel.yaml
+ruamel.yaml.clib
+safetensors
+scikit-learn
+scipy
+semver
+sentencepiece
+setuptools
+shellingham
+simple-websocket
+six
+sortedcontainers
+SoundCard
+sounddevice
+soundfile
+soxr
+speechbrain
+SQLAlchemy
+stack-data
+sympy
+tabulate
+tensorboardX
+threadpoolctl
+tiktoken
+tokenizers
+torch
+torch-audiomentations
+torch_pitch_shift
+torchaudio
+torchmetrics
+tornado
+tqdm
+traitlets
+transformers
+typer
+typing
+typing_extensions
+tzdata
+urllib3
+wavio
+wcwidth
+webrtcvad
+websocket-client
+Werkzeug
+wsproto
+yarl

static/icon_upload.png ADDED Viewed

templates/index2.html ADDED Viewed

	@@ -0,0 +1,753 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>Audio Transcription Studio</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <!-- <link rel="icon" href="https://lovable.dev/favicon.ico"> -->
+  <!-- <link rel="icon" href="https://cdn-icons-png.flaticon.com/512/727/727245.png?v=2"> -->
+  <link rel="icon" href=".../icons8-speech recognition-external-smashingstocks-glyph-smashing-stocks-32.png?v=2">
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:400,600,700&display=swap">
+  <style>
+    :root {
+      --bg: #18122b;
+      --bg-card: #231942;
+      --bg-card2: #251e3e;
+      --accent: #a259ec;
+      --accent2: #2563eb;
+      --text: #fff;
+      --text-muted: #bcbcbc;
+      --border: #312e4a;
+      --success: #22c55e;
+      --danger: #dc2626;
+      --cyan: #00fff7;
+    }
+    html,
+    body {
+      height: 100%;
+      margin: 0;
+      padding: 0;
+      font-family: 'Inter', Arial, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+    }
+    .layout {
+      display: flex;
+      min-height: 100vh;
+      gap: 32px;
+      padding: 32px;
+      box-sizing: border-box;
+    }
+    .main-panel {
+      flex: 2;
+      display: flex;
+      flex-direction: column;
+      gap: 24px;
+    }
+    .card {
+      background: var(--bg-card);
+      border-radius: 18px;
+      box-shadow: 0 2px 16px #0003;
+      padding: 32px 32px 24px 32px;
+      margin-bottom: 0;
+      border: 1.5px solid var(--border);
+    }
+    .card h2,
+    .card h3 {
+      margin-top: 0;
+      color: var(--accent);
+      font-size: 1.5em;
+      font-weight: 700;
+      margin-bottom: 18px;
+      letter-spacing: 1px;
+    }
+    .sidebar {
+      flex: 1;
+      min-width: 320px;
+      background: var(--bg-card2);
+      border-radius: 18px;
+      box-shadow: 0 2px 16px #0003;
+      padding: 32px 28px 24px 28px;
+      display: flex;
+      flex-direction: column;
+      gap: 32px;
+      border: 1.5px solid var(--border);
+      height: fit-content;
+    }
+    .sidebar h3 {
+      color: var(--accent2);
+      font-size: 1.2em;
+      font-weight: 700;
+      margin-bottom: 18px;
+      letter-spacing: 1px;
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    .sidebar label {
+      font-size: 1em;
+      color: var(--text-muted);
+      margin-top: 18px;
+      font-weight: 600;
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    .sidebar select,
+    .sidebar input[type="number"] {
+      width: 100%;
+      margin-top: 6px;
+      padding: 10px;
+      border-radius: 8px;
+      border: 1px solid var(--border);
+      background: #201c3a;
+      color: var(--text);
+      font-size: 1em;
+      margin-bottom: 10px;
+      outline: none;
+      transition: border 0.2s;
+    }
+    .sidebar select:focus,
+    .sidebar input[type="number"]:focus {
+      border: 1.5px solid var(--accent2);
+    }
+    .sidebar button {
+      width: 100%;
+      padding: 14px 0;
+      margin-top: 18px;
+      border: none;
+      border-radius: 8px;
+      background: var(--accent);
+      color: #fff;
+      font-size: 1.1em;
+      font-weight: 600;
+      cursor: pointer;
+      transition: background 0.2s;
+      box-shadow: 0 2px 8px #0002;
+    }
+    .sidebar button:disabled {
+      background: #a5b4fc;
+      cursor: not-allowed;
+    }
+    .sidebar .stop-btn {
+      background: var(--danger);
+      margin-top: 8px;
+    }
+    .toggle-row {
+      display: flex;
+      align-items: center;
+      gap: 10px;
+      margin-top: 10px;
+    }
+    .toggle-label {
+      flex: 1;
+      color: var(--text-muted);
+      font-size: 1em;
+    }
+    .toggle-switch {
+      width: 38px;
+      height: 22px;
+      background: #333;
+      border-radius: 12px;
+      position: relative;
+      cursor: pointer;
+      transition: background 0.2s;
+    }
+    .toggle-switch input {
+      display: none;
+    }
+    .toggle-slider {
+      position: absolute;
+      top: 2px;
+      left: 2px;
+      width: 18px;
+      height: 18px;
+      background: var(--accent2);
+      border-radius: 50%;
+      transition: left 0.2s;
+    }
+    .toggle-switch input:checked+.toggle-slider {
+      left: 18px;
+      background: var(--danger);
+    }
+    .status {
+      margin: 18px 0 0 0;
+      font-weight: bold;
+      color: var(--success);
+      font-size: 1.1em;
+      text-align: center;
+    }
+    .recorder-center {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 18px;
+      margin-bottom: 18px;
+    }
+    .recorder-btn {
+      width: 90px;
+      height: 90px;
+      border-radius: 50%;
+      background: linear-gradient(135deg, #a259ec 60%, #2563eb 100%);
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      box-shadow: 0 0 32px #a259ec55;
+      cursor: pointer;
+      transition: box-shadow 0.2s, background 0.2s;
+      position: relative;
+    }
+    .recorder-btn.recording {
+      background: linear-gradient(135deg, #dc2626 60%, #a259ec 100%);
+      box-shadow: 0 0 32px #dc262655;
+      animation: pulse 1.2s infinite;
+    }
+    @keyframes pulse {
+      0% {
+        box-shadow: 0 0 32px #dc262655;
+      }
+      50% {
+        box-shadow: 0 0 48px #dc2626aa;
+      }
+      100% {
+        box-shadow: 0 0 32px #dc262655;
+      }
+    }
+    .recorder-btn svg {
+      width: 38px;
+      height: 38px;
+      color: #fff;
+    }
+    .recorder-status {
+      color: var(--success);
+      font-size: 1.1em;
+      font-weight: 600;
+      margin-top: 8px;
+    }
+    .recorder-status.recording {
+      color: var(--danger);
+    }
+    .live {
+      margin-top: 0;
+      background: #201c3a;
+      border-radius: 12px;
+      padding: 18px 18px 10px 18px;
+      min-height: 90px;
+      border: 1px solid var(--border);
+      overflow: hidden;
+      /* hide outer overflow, inner #live will scroll */
+      display: flex;
+      flex-direction: column;
+    }
+    /* inner container which actually scrolls */
+    #live {
+      flex: 1 1 auto;
+      overflow-y: auto;
+      padding-right: 6px;
+      /* give room for scroll bar */
+      -webkit-overflow-scrolling: touch;
+      scroll-behavior: smooth;
+      color: var(--text-muted);
+    }
+    .live h4 {
+      margin: 0 0 10px 0;
+      color: var(--cyan);
+      font-size: 1.08em;
+      font-weight: 600;
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    .chunk {
+      background: linear-gradient(90deg, rgba(45, 37, 74, 0.2), rgba(38, 32, 63, 0.12));
+      margin-bottom: 8px;
+      padding: 10px 12px;
+      border-radius: 8px;
+      font-size: 0.98em;
+      color: var(--text);
+      box-shadow: 0 1px 2px #0002;
+      border: 1px solid rgba(255, 255, 255, 0.02);
+    }
+    /* Small speaker label */
+    .chunk b {
+      color: var(--cyan);
+      margin-right: 6px;
+      font-weight: 700;
+    }
+    /* THEMED SCROLLBAR - WebKit (Chrome, Edge, Safari) */
+    #live::-webkit-scrollbar {
+      width: 10px;
+    }
+    #live::-webkit-scrollbar-track {
+      background: rgba(255, 255, 255, 0.02);
+      border-radius: 10px;
+    }
+    #live::-webkit-scrollbar-thumb {
+      background: linear-gradient(180deg, var(--accent) 0%, var(--accent2) 100%);
+      border-radius: 10px;
+      border: 2px solid rgba(0, 0, 0, 0.15);
+    }
+    #live::-webkit-scrollbar-thumb:hover {
+      filter: brightness(0.95);
+    }
+    /* THEMED SCROLLBAR - Firefox */
+    #live {
+      scrollbar-width: thin;
+      scrollbar-color: var(--accent) rgba(255, 255, 255, 0.02);
+    }
+    /* responsive: reduce max-height on small screens */
+    @media (max-width: 700px) {
+      .live {
+        max-height: 200px;
+      }
+    }
+    .files h4 {
+      color: var(--accent2);
+      font-size: 1.08em;
+      margin: 0 0 10px 0;
+      font-weight: 600;
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    .file {
+      background: #2d254a;
+      margin-bottom: 8px;
+      padding: 8px 12px;
+      border-radius: 5px;
+      font-size: 1em;
+      color: #e0e7ef;
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      box-shadow: 0 1px 2px #0001;
+    }
+    .file a {
+      color: var(--accent2);
+      text-decoration: none;
+      font-weight: 500;
+    }
+    .file a:hover {
+      text-decoration: underline;
+    }
+    #audio-player-container {
+      margin-bottom: 18px;
+    }
+    #waveform {
+      width: 100%;
+      height: 80px;
+      background: #2d254a;
+      border-radius: 6px;
+    }
+    #transcript-container {
+      background: #2d254a;
+      padding: 14px;
+      border-radius: 6px;
+      margin-top: 24px;
+    }
+    #transcript-content {
+      margin-top: 10px;
+      white-space: pre-wrap;
+      font-size: 1em;
+      color: #e0e7ef;
+      max-height: 300px;
+      overflow: auto;
+      background: #201c3a;
+      padding: 10px;
+      border-radius: 4px;
+    }
+    @media (max-width: 1100px) {
+      .layout {
+        flex-direction: column;
+        gap: 0;
+        padding: 12px;
+      }
+      .sidebar {
+        min-width: unset;
+        width: 100%;
+        margin-bottom: 18px;
+      }
+      .main-panel {
+        padding: 0;
+      }
+    }
+    @media (max-width: 700px) {
+      .card,
+      .sidebar {
+        padding: 16px 8px 12px 8px;
+      }
+      .main-panel {
+        gap: 12px;
+      }
+    }
+  </style>
+</head>
+<body>
+  <div class="layout">
+    <main class="main-panel">
+      <section class="card">
+        <h2 style="text-align:center;font-size:2.2em;color:#a259ec;margin-bottom:0;">Audio Transcription Studio</h2>
+        <div style="text-align:center;color:#bcbcbc;margin-bottom:24px;">
+          Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.
+        </div>
+        <div class="recorder-center">
+          <div id="recorderBtn" class="recorder-btn" title="Start/Stop Recording">
+            <svg id="micIcon" xmlns="http://www.w3.org/2000/svg" width="38" height="38" viewBox="0 0 24 24" fill="none"
+              stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
+              class="lucide lucide-mic-icon lucide-mic">
+              <path d="M12 19v3" />
+              <path d="M19 10v2a7 7 0 0 1-14 0v-2" />
+              <rect x="9" y="2" width="6" height="13" rx="3" />
+            </svg>
+            <svg id="stopIcon" style="display:none;" xmlns="http://www.w3.org/2000/svg" fill="currentColor"
+              viewBox="0 0 24 24">
+              <rect x="6" y="6" width="12" height="12" rx="2" />
+            </svg>
+          </div>
+          <div id="recorderStatus" class="recorder-status">Ready to record</div>
+        </div>
+      </section>
+      <section class="card">
+        <h3><span style="color:var(--cyan);">💬</span> Live Transcription</h3>
+        <div class="live">
+          <div id="live" style="min-height:32px;color:#bcbcbc;">Start recording to see live transcription</div>
+        </div>
+      </section>
+      <!-- <section class="card files">
+        <h4><span style="color:var(--accent2);">📁</span> Recording Files</h4>
+        <div id="audio-player-container"></div>
+        <div id="transcript-container"></div>
+        <div id="files"></div>
+      </section> -->
+    </main>
+    <aside class="sidebar">
+      <h3><span style="color:var(--accent2);">⚙️</span> Recording Settings</h3>
+      <label for="mic">Microphone Device</label>
+      <select id="mic" disabled>
+        <option value="1" selected>Default Microphone (#1)</option>
+      </select>
+      <label for="sys">System Audio (Optional)</label>
+      <select id="sys" disabled>
+        <option value="16" selected>System Loopback (#16)</option>
+      </select>
+      <label for="chunk_secs">Chunk Length (seconds)</label>
+      <input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
+      <label for="model">Transcription Model</label>
+      <select id="model" disabled>
+        <option value="small">Small (Fast)</option>
+        <option value="medium" selected>Medium (Balanced)</option>
+        <option value="large">Large (Accurate)</option>
+      </select>
+      <div class="toggle-row">
+        <span class="toggle-label">Disable Transcription</span>
+        <label class="toggle-switch">
+          <input type="checkbox" id="no_transcribe">
+          <span class="toggle-slider"></span>
+        </label>
+      </div>
+      <div class="status" id="status"></div>
+    </aside>
+  </div>
+  <script>
+    // --- Recording Button Logic ---
+    let isRecording = false;
+    let polling = null;
+    const recorderBtn = document.getElementById('recorderBtn');
+    const micIcon = document.getElementById('micIcon');
+    const stopIcon = document.getElementById('stopIcon');
+    const recorderStatus = document.getElementById('recorderStatus');
+    const startBtn = recorderBtn; // Use the big round button
+    function setRecordingUI(recording) {
+      isRecording = recording;
+      if (recording) {
+        recorderBtn.classList.add('recording');
+        micIcon.style.display = 'none';
+        stopIcon.style.display = '';
+        recorderStatus.textContent = 'Recording...';
+        recorderStatus.classList.add('recording');
+      } else {
+        recorderBtn.classList.remove('recording');
+        micIcon.style.display = '';
+        stopIcon.style.display = 'none';
+        recorderStatus.textContent = 'Ready to record';
+        recorderStatus.classList.remove('recording');
+      }
+    }
+    recorderBtn.onclick = async function () {
+      if (!isRecording) {
+        await startRecording();
+      } else {
+        await stopRecording();
+      }
+    };
+    async function startRecording() {
+      const mic = 1;
+      const sys = 16;
+      const chunk_secs = 5;
+      const model = "medium";
+      const no_transcribe = document.getElementById('no_transcribe').checked;
+      const statusEl = document.getElementById('status');
+      // Show immediate feedback
+      statusEl.textContent = 'Starting...';
+      // reset color to normal (css var)
+      statusEl.style.color = 'var(--accent2)';
+      try {
+        const resp = await fetch('/api/start-recording', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
+        });
+        // const resp = await fetch('/transcribe_live/start', {
+        //   method: 'POST',
+        //   headers: { 'Content-Type': 'application/json' },
+        //   body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
+        // });
+        if (!resp.ok) {
+          // Attempt to parse JSON { error: "..." } from backend
+          let errMsg = `Failed to start recording (${resp.status})`;
+          try {
+            const json = await resp.json();
+            if (json && json.error) errMsg = json.error;
+          } catch (e) {
+            // ignore parse error, keep fallback message
+          }
+          statusEl.textContent = errMsg;
+          statusEl.style.color = 'var(--danger)'; // show as error
+          setRecordingUI(false);
+          return; // don't start polling
+        }
+        // Success: start UI + polling
+        statusEl.textContent = 'Recording...';
+        statusEl.style.color = 'var(--danger)'; // recording color
+        setRecordingUI(true);
+        pollStatus();
+      } catch (err) {
+        // Network / unexpected error
+        statusEl.textContent = 'Network error: could not start recording';
+        statusEl.style.color = 'var(--danger)';
+        setRecordingUI(false);
+        console.error("startRecording error:", err);
+      }
+    }
+    async function stopRecording() {
+      await fetch('/api/stop-recording', { method: 'POST' });
+      document.getElementById('status').textContent = 'Stopping...';
+      setRecordingUI(false);
+      if (polling) clearInterval(polling);
+      setTimeout(() => { loadFiles(); }, 2000);
+    }
+    // --- Poll status ---
+    function pollStatus() {
+      polling = setInterval(async () => {
+        const res = await fetch('/api/recording-status');
+        const data = await res.json();
+        setRecordingUI(data.recording);
+        // --- Show live transcription ---
+        const liveDiv = document.getElementById('live');
+        liveDiv.innerHTML = '';
+        if (data.live_segments && data.live_segments.length) {
+          data.live_segments.slice(-10).forEach(seg => {
+            const div = document.createElement('div');
+            div.className = 'chunk';
+            div.innerHTML = `<b>${seg.speaker || 'Speaker'}:</b> [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.text}`;
+            liveDiv.appendChild(div);
+          });
+          requestAnimationFrame(() => {
+            liveDiv.scrollTop = liveDiv.scrollHeight;
+          });
+        } else {
+          liveDiv.textContent = 'No Transcription Yet...';
+        }
+        if (!data.recording) {
+          clearInterval(polling);
+          setRecordingUI(false);
+          loadFiles();
+        }
+      }, 1000);
+    }
+    // Helper to format time
+    function formatTime(s) {
+      if (s == null) return "0:00";
+      const mm = Math.floor(s / 60);
+      const ss = Math.floor(s % 60).toString().padStart(2, "0");
+      return `${mm}:${ss}`;
+    }
+    // --- Load final files and display audio player and transcript ---
+    async function loadFiles() {
+      const filesDiv = document.getElementById('files');
+      const audioPlayerDiv = document.getElementById('audio-player-container');
+      const transcriptDiv = document.getElementById('transcript-container');
+      filesDiv.innerHTML = '';
+      audioPlayerDiv.innerHTML = '';
+      transcriptDiv.innerHTML = '';
+      try {
+        const res = await fetch('/api/final-files');
+        const data = await res.json();
+        if (!data.files.length) {
+          filesDiv.textContent = 'No files yet.';
+          return;
+        }
+        // Find the latest recorded_audio_{RUN_TIMESTAMP}.wav and transcript_{RUN_TIMESTAMP}.txt
+        let audioFile = null, transcriptFile = null;
+        data.files.forEach(f => {
+          if (/^recorded_audio_.*\.wav$/.test(f.name)) audioFile = f;
+          if (/^transcript_.*\.txt$/.test(f.name)) transcriptFile = f;
+        });
+        // Display audio player with waveform (using wavesurfer.js if available, else fallback)
+        if (audioFile) {
+          audioPlayerDiv.innerHTML = `
+            <div style="margin-bottom:12px;">
+              <b>${audioFile.name}</b>
+            </div>
+            <div id="waveform" style="width:100%;height:80px;background:#2d254a;border-radius:6px;"></div>
+            <audio id="audio-player" controls style="width:100%;margin-top:8px;">
+              <source src="${audioFile.url || audioFile.path}" type="audio/wav">
+              Your browser does not support the audio element.
+            </audio>
+          `;
+          // Try to use wavesurfer.js for waveform
+          if (window.WaveSurfer) {
+            const wavesurfer = WaveSurfer.create({
+              container: '#waveform',
+              waveColor: '#a259ec',
+              progressColor: '#2563eb',
+              height: 80,
+              barWidth: 2,
+              responsive: true,
+              cursorColor: '#dc2626'
+            });
+            wavesurfer.load(audioFile.url || audioFile.path);
+            // Sync play/pause with audio element
+            const audioElem = document.getElementById('audio-player');
+            audioElem.addEventListener('play', () => wavesurfer.play());
+            audioElem.addEventListener('pause', () => wavesurfer.pause());
+            wavesurfer.on('seek', (progress) => {
+              audioElem.currentTime = progress * audioElem.duration;
+            });
+            audioElem.addEventListener('timeupdate', () => {
+              if (!audioElem.paused) {
+                wavesurfer.seekTo(audioElem.currentTime / audioElem.duration);
+              }
+            });
+          } else {
+            document.getElementById('waveform').innerHTML = '<div style="color:#64748b;text-align:center;padding-top:28px;">(Waveform preview requires wavesurfer.js)</div>';
+          }
+        }
+        // Display transcript file content
+        if (transcriptFile) {
+          transcriptDiv.innerHTML = `
+            <b>${transcriptFile.name}</b>
+            <pre id="transcript-content"></pre>
+          `;
+          // Fetch and display transcript text
+          fetch(transcriptFile.url || transcriptFile.path)
+            .then(r => r.text())
+            .then(txt => {
+              document.getElementById('transcript-content').textContent = txt;
+            });
+        }
+        // List other files (if any)
+        data.files.forEach(f => {
+          if (
+            (audioFile && f.name === audioFile.name) ||
+            (transcriptFile && f.name === transcriptFile.name)
+          ) return;
+          const div = document.createElement('div');
+          div.className = 'file';
+          div.innerHTML = `<span>${f.name}</span> <a href="${f.url || f.path}" target="_blank">Download</a>`;
+          filesDiv.appendChild(div);
+        });
+      } catch (e) {
+        filesDiv.textContent = 'Error loading files.';
+      }
+    }
+    // --- On load ---
+    loadFiles();
+    // Optionally load wavesurfer.js dynamically if not present
+    if (!window.WaveSurfer) {
+      const script = document.createElement('script');
+      script.src = "https://unpkg.com/wavesurfer.js";
+      script.onload = () => { /* will auto-init on next loadFiles() call */ };
+      document.head.appendChild(script);
+    }
+  </script>
+</body>
+</html>

templates/index2_upload.html ADDED Viewed

	@@ -0,0 +1,736 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <title>Audio Transcription Studio</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <link rel="icon" href=".../icons8-speech recognition-external-smashingstocks-glyph-smashing-stocks-32.png?v=2">
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:400,600,700&display=swap">
+    <style>
+        :root {
+            --bg: #18122b;
+            --bg-card: #231942;
+            --bg-card2: #251e3e;
+            --accent: #a259ec;
+            --accent2: #2563eb;
+            --text: #fff;
+            --text-muted: #bcbcbc;
+            --border: #312e4a;
+            --success: #22c55e;
+            --danger: #dc2626;
+            --cyan: #00fff7;
+        }
+        html,
+        body {
+            height: 100%;
+            margin: 0;
+            padding: 0;
+            font-family: 'Inter', Arial, sans-serif;
+            background: var(--bg);
+            color: var(--text);
+        }
+        .layout {
+            display: flex;
+            min-height: 100vh;
+            gap: 32px;
+            padding: 32px;
+            box-sizing: border-box;
+        }
+        .main-panel {
+            flex: 2;
+            display: flex;
+            flex-direction: column;
+            gap: 24px;
+        }
+        .card {
+            background: var(--bg-card);
+            border-radius: 18px;
+            box-shadow: 0 2px 16px #0003;
+            padding: 32px 32px 24px 32px;
+            margin-bottom: 0;
+            border: 1.5px solid var(--border);
+        }
+        .card h2,
+        .card h3 {
+            margin-top: 0;
+            color: var(--accent);
+            font-size: 1.5em;
+            font-weight: 700;
+            margin-bottom: 18px;
+            letter-spacing: 1px;
+        }
+        .sidebar {
+            flex: 1;
+            min-width: 320px;
+            background: var(--bg-card2);
+            border-radius: 18px;
+            box-shadow: 0 2px 16px #0003;
+            padding: 32px 28px 24px 28px;
+            display: flex;
+            flex-direction: column;
+            gap: 32px;
+            border: 1.5px solid var(--border);
+            height: fit-content;
+        }
+        .sidebar h3 {
+            color: var(--accent2);
+            font-size: 1.2em;
+            font-weight: 700;
+            margin-bottom: 18px;
+            letter-spacing: 1px;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+        .sidebar label {
+            font-size: 1em;
+            color: var(--text-muted);
+            margin-top: 18px;
+            font-weight: 600;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+        .sidebar select,
+        .sidebar input[type="number"] {
+            width: 100%;
+            margin-top: 6px;
+            padding: 10px;
+            border-radius: 8px;
+            border: 1px solid var(--border);
+            background: #201c3a;
+            color: var(--text);
+            font-size: 1em;
+            margin-bottom: 10px;
+            outline: none;
+            transition: border 0.2s;
+        }
+        .sidebar select:focus,
+        .sidebar input[type="number"]:focus {
+            border: 1.5px solid var(--accent2);
+        }
+        .sidebar button {
+            width: 100%;
+            padding: 14px 0;
+            margin-top: 18px;
+            border: none;
+            border-radius: 8px;
+            background: var(--accent);
+            color: #fff;
+            font-size: 1.1em;
+            font-weight: 600;
+            cursor: pointer;
+            transition: background 0.2s;
+            box-shadow: 0 2px 8px #0002;
+        }
+        .sidebar button:disabled {
+            background: #a5b4fc;
+            cursor: not-allowed;
+        }
+        .sidebar .stop-btn {
+            background: var(--danger);
+            margin-top: 8px;
+        }
+        .toggle-row {
+            display: flex;
+            align-items: center;
+            gap: 10px;
+            margin-top: 10px;
+        }
+        .toggle-label {
+            flex: 1;
+            color: var(--text-muted);
+            font-size: 1em;
+        }
+        .toggle-switch {
+            width: 38px;
+            height: 22px;
+            background: #333;
+            border-radius: 12px;
+            position: relative;
+            cursor: pointer;
+            transition: background 0.2s;
+        }
+        .toggle-switch input {
+            display: none;
+        }
+        .toggle-slider {
+            position: absolute;
+            top: 2px;
+            left: 2px;
+            width: 18px;
+            height: 18px;
+            background: var(--accent2);
+            border-radius: 50%;
+            transition: left 0.2s;
+        }
+        .toggle-switch input:checked+.toggle-slider {
+            left: 18px;
+            background: var(--danger);
+        }
+        .status {
+            margin: 18px 0 0 0;
+            font-weight: bold;
+            color: var(--success);
+            font-size: 1.1em;
+            text-align: center;
+        }
+        .recorder-center {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 18px;
+            margin-bottom: 18px;
+        }
+        .recorder-btn {
+            width: 90px;
+            height: 90px;
+            border-radius: 50%;
+            background: linear-gradient(135deg, #a259ec 60%, #2563eb 100%);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            box-shadow: 0 0 32px #a259ec55;
+            cursor: pointer;
+            transition: box-shadow 0.2s, background 0.2s;
+            position: relative;
+        }
+        .recorder-btn.recording {
+            background: linear-gradient(135deg, #dc2626 60%, #a259ec 100%);
+            box-shadow: 0 0 32px #dc262655;
+            animation: pulse 1.2s infinite;
+        }
+        @keyframes pulse {
+            0% {
+                box-shadow: 0 0 32px #dc262655;
+            }
+            50% {
+                box-shadow: 0 0 48px #dc2626aa;
+            }
+            100% {
+                box-shadow: 0 0 32px #dc262655;
+            }
+        }
+        .recorder-btn svg {
+            width: 38px;
+            height: 38px;
+            color: #fff;
+        }
+        .recorder-status {
+            color: var(--success);
+            font-size: 1.1em;
+            font-weight: 600;
+            margin-top: 8px;
+        }
+        .recorder-status.recording {
+            color: var(--danger);
+        }
+        .live {
+            margin-top: 0;
+            background: #201c3a;
+            border-radius: 12px;
+            padding: 18px 18px 10px 18px;
+            min-height: 90px;
+            border: 1px solid var(--border);
+            overflow: hidden;
+            display: flex;
+            flex-direction: column;
+        }
+        /* inner container which actually scrolls */
+        #live {
+            flex: 1 1 auto;
+            overflow-y: auto;
+            padding-right: 6px;
+            -webkit-overflow-scrolling: touch;
+            scroll-behavior: smooth;
+            color: var(--text-muted);
+        }
+        .live h4 {
+            margin: 0 0 10px 0;
+            color: var(--cyan);
+            font-size: 1.08em;
+            font-weight: 600;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+        .chunk {
+            background: linear-gradient(90deg, rgba(45, 37, 74, 0.2), rgba(38, 32, 63, 0.12));
+            margin-bottom: 8px;
+            padding: 10px 12px;
+            border-radius: 8px;
+            font-size: 0.98em;
+            color: var(--text);
+            box-shadow: 0 1px 2px #0002;
+            border: 1px solid rgba(255, 255, 255, 0.02);
+        }
+        .chunk b {
+            color: var(--cyan);
+            margin-right: 6px;
+            font-weight: 700;
+        }
+        /* THEMED SCROLLBAR - WebKit (Chrome, Edge, Safari) */
+        #live::-webkit-scrollbar {
+            width: 10px;
+        }
+        #live::-webkit-scrollbar-track {
+            background: rgba(255, 255, 255, 0.02);
+            border-radius: 10px;
+        }
+        #live::-webkit-scrollbar-thumb {
+            background: linear-gradient(180deg, var(--accent) 0%, var(--accent2) 100%);
+            border-radius: 10px;
+            border: 2px solid rgba(0, 0, 0, 0.15);
+        }
+        #live::-webkit-scrollbar-thumb:hover {
+            filter: brightness(0.95);
+        }
+        #live {
+            scrollbar-width: thin;
+            scrollbar-color: var(--accent) rgba(255, 255, 255, 0.02);
+        }
+        @media (max-width: 700px) {
+            .live {
+                max-height: 200px;
+            }
+        }
+        .files h4 {
+            color: var(--accent2);
+            font-size: 1.08em;
+            margin: 0 0 10px 0;
+            font-weight: 600;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+        .file {
+            background: #2d254a;
+            margin-bottom: 8px;
+            padding: 8px 12px;
+            border-radius: 5px;
+            font-size: 1em;
+            color: #e0e7ef;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            box-shadow: 0 1px 2px #0001;
+        }
+        .file a {
+            color: var(--accent2);
+            text-decoration: none;
+            font-weight: 500;
+        }
+        .file a:hover {
+            text-decoration: underline;
+        }
+        #audio-player-container {
+            margin-bottom: 18px;
+        }
+        #waveform {
+            width: 100%;
+            height: 80px;
+            background: #2d254a;
+            border-radius: 6px;
+        }
+        #transcript-container {
+            background: #2d254a;
+            padding: 14px;
+            border-radius: 6px;
+            margin-top: 24px;
+        }
+        #transcript-content {
+            margin-top: 10px;
+            white-space: pre-wrap;
+            font-size: 1em;
+            color: #e0e7ef;
+            max-height: 300px;
+            overflow: auto;
+            background: #201c3a;
+            padding: 10px;
+            border-radius: 4px;
+        }
+        @media (max-width: 1100px) {
+            .layout {
+                flex-direction: column;
+                gap: 0;
+                padding: 12px;
+            }
+            .sidebar {
+                min-width: unset;
+                width: 100%;
+                margin-bottom: 18px;
+            }
+            .main-panel {
+                padding: 0;
+            }
+        }
+        @media (max-width: 700px) {
+            .card,
+            .sidebar {
+                padding: 16px 8px 12px 8px;
+            }
+            .main-panel {
+                gap: 12px;
+            }
+        }
+        /* UPLOAD area styles */
+        .upload {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 10px;
+            padding: 18px 22px;
+            border-radius: 12px;
+            background: rgba(255, 255, 255, 0.02);
+            border: 1px solid rgba(255, 255, 255, 0.03);
+            cursor: default;
+            width: 100%;
+            max-width: 420px;
+            margin: 0 auto;
+        }
+        .upload-btn {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 10px;
+            cursor: pointer;
+            outline: none;
+            user-select: none;
+        }
+        .upload-btn:focus-visible {
+            box-shadow: 0 0 0 8px rgba(37, 99, 235, 0.08);
+            border-radius: 12px;
+        }
+        .upload-img {
+            width: 120px;
+            height: 96px;
+            object-fit: contain;
+            display: block;
+            user-select: none;
+            pointer-events: none;
+        }
+        .upload-text {
+            color: #bcbcbc;
+            font-weight: 700;
+            font-size: 14px;
+            text-align: center;
+            max-width: 220px;
+            word-break: break-word;
+        }
+        /* preview area inside upload container */
+        .upload-preview {
+            width: 100%;
+            display: flex;
+            flex-direction: column;
+            gap: 10px;
+            align-items: center;
+            margin-top: 6px;
+        }
+        .upload-preview .filename {
+            color: var(--text);
+            font-weight: 600;
+            font-size: 0.95em;
+            text-align: center;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            max-width: 100%;
+        }
+        .upload-preview .controls {
+            display: flex;
+            gap: 8px;
+            align-items: center;
+        }
+        .btn-small {
+            padding: 6px 10px;
+            border-radius: 6px;
+            background: #2d254a;
+            color: var(--text);
+            border: 1px solid rgba(255, 255, 255, 0.03);
+            cursor: pointer;
+            font-weight: 600;
+        }
+    </style>
+</head>
+<body>
+    <div class="layout">
+        <main class="main-panel">
+            <section class="card">
+                <h2 style="text-align:center;font-size:2.2em;color:#a259ec;margin-bottom:0;">Audio Transcription Studio
+                </h2>
+                <div style="text-align:center;color:#bcbcbc;margin-bottom:24px;">
+                    Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.
+                </div>
+                <div class="upload">
+                    <label for="uploadFile" class="upload-btn" tabindex="0" role="button"
+                        aria-label="Upload audio file">
+                        <img class="upload-img" src="/static/icon_upload.png" alt="Upload icon" />
+                        <small class="upload-text">Upload .mp3, .wav file</small>
+                    </label>
+                    <input id="uploadFile" type="file" accept=".mp3,.wav,audio/*" style="display:none" />
+                    <div id="uploadPreview" class="upload-preview" aria-live="polite"></div>
+                </div>
+            </section>
+            <section class="card">
+                <h3><span style="color:var(--cyan);">💬</span> Live Transcription</h3>
+                <div class="live">
+                    <div id="live" style="min-height:32px;color:#bcbcbc;">Start recording to see live transcription
+                    </div>
+                </div>
+            </section>
+        </main>
+        <aside class="sidebar">
+            <h3><span style="color:var(--accent2);">⚙️</span> Recording Settings</h3>
+            <label for="mic">Microphone Device</label>
+            <select id="mic" disabled>
+                <option value="1" selected>Default Microphone (#1)</option>
+            </select>
+            <label for="sys">System Audio (Optional)</label>
+            <select id="sys" disabled>
+                <option value="16" selected>System Loopback (#16)</option>
+            </select>
+            <label for="chunk_secs">Chunk Length (seconds)</label>
+            <input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
+            <label for="model">Transcription Model</label>
+            <select id="model" disabled>
+                <option value="small">Small (Fast)</option>
+                <option value="medium" selected>Medium (Balanced)</option>
+                <option value="large">Large (Accurate)</option>
+            </select>
+            <!-- <div class="toggle-row">
+                <span class="toggle-label">Disable Transcription</span>
+                <label class="toggle-switch">
+                    <input type="checkbox" id="no_transcribe">
+                    <span class="toggle-slider"></span>
+                </label>
+            </div> -->
+            <div class="status" id="status"></div>
+        </aside>
+    </div>
+    <script>
+        (function () {
+            const uploadEl = document.getElementById('uploadFile');
+            const preview = document.getElementById('uploadPreview');
+            const live = document.getElementById('live');
+            const statusEl = document.getElementById('status');
+            let audioEl = null;
+            let es = null; // EventSource
+            let playing = false;
+            let currentFile = null;
+            async function uploadFile(file) {
+                const fd = new FormData();
+                fd.append('file', file);
+                const resp = await fetch('/api/upload', { method: 'POST', body: fd });
+                return resp.json();
+            }
+            function createAudioPlayer(url, filename) {
+                preview.innerHTML = '';
+                const container = document.createElement('div');
+                container.style.width = '100%';
+                container.style.display = 'flex';
+                container.style.flexDirection = 'column';
+                container.style.alignItems = 'center';
+                const audio = document.createElement('audio');
+                audio.controls = true;
+                audio.src = url;
+                audio.id = 'uploadedAudio';
+                audio.style.width = '100%';
+                audio.dataset.filename = filename;
+                const info = document.createElement('div');
+                info.className = 'filename';
+                info.textContent = filename;
+                container.appendChild(info);
+                container.appendChild(audio);
+                preview.appendChild(container);
+                // listeners
+                audio.addEventListener('play', onPlay);
+                audio.addEventListener('pause', onPause);
+                audioEl = audio;
+            }
+            async function onPlay() {
+                if (!audioEl || playing) return;
+                playing = true;
+                currentFile = audioEl.dataset.filename;
+                // update UI
+                statusEl.textContent = "▶️ Transcribing...";
+                statusEl.style.color = "var(--success)";
+                try {
+                    await fetch('/api/start-transcribe-file', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ filename: currentFile })
+                    });
+                } catch (e) {
+                    console.error('start failed', e);
+                }
+                // open SSE for transcription
+                if (es) es.close();
+                es = new EventSource('/events');
+                es.onmessage = function (ev) {
+                    const line = ev.data;
+                    if (!line) return;
+                    appendLine(line);
+                };
+                es.onerror = (e) => {
+                    console.warn('SSE error', e);
+                };
+            }
+            async function onPause() {
+                if (!audioEl || !playing) return;
+                playing = false;
+                statusEl.textContent = "⏸️ Stopped";
+                statusEl.style.color = "var(--danger)";
+                try {
+                    await fetch('/stop', { method: 'POST' });
+                } catch (e) {
+                    console.error('stop failed', e);
+                }
+                if (es) {
+                    es.close();
+                    es = null;
+                }
+            }
+            // function appendLine(s) {
+            //     const chunk = document.createElement('div');
+            //     chunk.className = 'chunk';
+            //     chunk.textContent = s;
+            //     live.appendChild(chunk);
+            //     live.scrollTop = live.scrollHeight;
+            // }
+            function appendLine(s) {
+                const chunk = document.createElement('div');
+                chunk.className = 'chunk';
+                // Try to parse format: [file.wav] 0.00-3.00 Speaker A: Hello world
+                const m = s.match(/^\[(.*?)\]\s+([\d.]+)-([\d.]+)\s+Speaker\s+(\S+):\s+(.*)$/);
+                if (m) {
+                    const [, file, start, end, speaker, text] = m;
+                    chunk.innerHTML = `<b>${speaker}</b> [${start}-${end}s]: ${text}`;
+                } else {
+                    chunk.textContent = s;
+                }
+                live.appendChild(chunk);
+                live.scrollTop = live.scrollHeight;
+            }
+            // Poll /status every few seconds (optional, keeps sidebar updated)
+            async function pollStatus() {
+                try {
+                    const r = await fetch('/status');
+                    const js = await r.json();
+                    if (js.running) {
+                        statusEl.textContent = "▶️ Running";
+                        statusEl.style.color = "var(--success)";
+                    } else if (!playing) {
+                        statusEl.textContent = "⏸️ Idle";
+                        statusEl.style.color = "var(--text-muted)";
+                    }
+                } catch (e) { }
+                setTimeout(pollStatus, 3000);
+            }
+            pollStatus();
+            uploadEl.addEventListener('change', async (ev) => {
+                const file = ev.target.files && ev.target.files[0];
+                if (!file) return;
+                const res = await uploadFile(file);
+                if (res && res.success) {
+                    createAudioPlayer(res.url, res.filename);
+                    live.innerHTML = '<div style="color:#bcbcbc;">Ready. Play audio to start live transcription.</div>';
+                } else {
+                    alert('Upload failed: ' + (res && res.error ? res.error : 'unknown'));
+                }
+            });
+        })();
+    </script>
+</body>
+</html>

templates/landing.html ADDED Viewed

	@@ -0,0 +1,160 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>LiveTranscribe — Real-time Audio Transcription</title>
+  <meta name="description" content="LiveTranscribe: real-time microphone + uploaded audio transcription with speaker diarization and downloadable transcripts." />
+  <!-- Tailwind CDN for quick styling -->
+  <script src="https://cdn.tailwindcss.com"></script>
+  <style>
+    /* small extra tweaks */
+    .feature-icon { width:48px; height:48px; }
+  </style>
+</head>
+<body class="bg-gray-50 text-gray-800 font-sans">
+  <header class="bg-white shadow">
+    <div class="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
+      <div class="flex items-center gap-3">
+        <div class="w-10 h-10 rounded-lg bg-gradient-to-tr from-indigo-500 to-purple-500 flex items-center justify-center text-white font-bold">LT</div>
+        <div>
+          <a href="#home" class="text-xl font-semibold">LiveTranscribe</a>
+          <div class="text-sm text-gray-500">Real-time transcription + diarization</div>
+        </div>
+      </div>
+      <nav class="hidden md:flex items-center gap-6 text-sm">
+        <a href="#features" class="hover:underline">Features</a>
+        <a href="#how" class="hover:underline">How it works</a>
+        <a href="#deploy" class="hover:underline">Deploy</a>
+        <a href="#try" class="px-4 py-2 rounded-lg bg-indigo-600 text-white">Get started</a>
+      </nav>
+      <div class="md:hidden">
+        <button id="menuBtn" class="px-3 py-2 bg-gray-100 rounded">Menu</button>
+      </div>
+    </div>
+  </header>
+  <main id="home" class="max-w-6xl mx-auto px-6 py-12">
+    <section class="grid md:grid-cols-2 gap-10 items-center">
+      <div>
+        <h1 class="text-4xl md:text-5xl font-extrabold leading-tight">Real-time transcription for live and uploaded audio</h1>
+        <p class="mt-4 text-gray-600">Record, chunk, diarize speakers and transcribe — all in real time. Useful for meetings, podcasts, lectures, and interviews.</p>
+        <div class="mt-6 flex gap-3">
+          <a href="/live" class="px-5 py-3 rounded-lg bg-indigo-600 text-white shadow">Open Live Recorder</a>
+          <a href="/upload" class="px-5 py-3 rounded-lg border border-gray-200">Upload & Live Transcribe</a>
+        </div>
+        <div class="mt-6 text-sm text-gray-500">
+          <strong>Note:</strong> The links above assume your Flask static templates are served at the project root. Adjust if your routes differ.
+        </div>
+      </div>
+      <!-- <div class="bg-white rounded-lg shadow p-6">
+        <h3 class="font-semibold">Quick demo</h3>
+        <ol class="mt-3 list-decimal list-inside text-gray-600 text-sm space-y-2">
+          <li>Open <code>/index2.html</code> to start the live recorder and see live transcription + diarization.</li>
+          <li>Open <code>/index2_upload.html</code> to upload an audio file and receive a streaming transcript.</li>
+          <li>Download transcripts or copy them from the web UI.</li>
+        </ol>
+      </div> -->
+    </section>
+    <section id="features" class="mt-14">
+      <h2 class="text-2xl font-bold">Features</h2>
+      <div class="grid md:grid-cols-3 gap-6 mt-6">
+        <article class="bg-white p-6 rounded-lg shadow-sm">
+          <img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%235a67d8' viewBox='0 0 24 24'><path d='M12 3v10c0 3.31-2.69 6-6 6H4v2h8v-2h-2c2.21 0 4-1.79 4-4V3h-6z'/></svg>" alt="microphone" />
+          <h4 class="mt-3 font-semibold">Live recording</h4>
+          <p class="text-sm text-gray-600 mt-2">Capture mic & system audio in short chunks (5s) and stream them to the transcriber.</p>
+        </article>
+        <article class="bg-white p-6 rounded-lg shadow-sm">
+          <img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%2337a6ff' viewBox='0 0 24 24'><path d='M12 2C8 2 5 5 5 9c0 4.42 7 13 7 13s7-8.58 7-13c0-4-3-7-7-7z'/></svg>" alt="upload" />
+          <h4 class="mt-3 font-semibold">Upload + stream</h4>
+          <p class="text-sm text-gray-600 mt-2">Users upload audio files and get live, incremental transcripts back in the browser.</p>
+        </article>
+        <article class="bg-white p-6 rounded-lg shadow-sm">
+          <img class="feature-icon" src="data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' fill='%23f59e0b' viewBox='0 0 24 24'><path d='M12 12c2.21 0 4-1.79 4-4V4h-8v4c0 2.21 1.79 4 4 4z'/></svg>" alt="diarization" />
+          <h4 class="mt-3 font-semibold">Speaker diarization</h4>
+          <p class="text-sm text-gray-600 mt-2">Differentiate speakers in meetings so transcripts label speakers/segments automatically.</p>
+        </article>
+      </div>
+    </section>
+    <section id="how" class="mt-14">
+      <h2 class="text-2xl font-bold">How it works</h2>
+      <div class="mt-4 grid md:grid-cols-3 gap-6">
+        <div class="col-span-2 bg-white p-6 rounded-lg shadow-sm">
+          <h3 class="font-semibold">Architecture (high level)</h3>
+          <ol class="list-decimal list-inside text-gray-600 mt-3 space-y-2 text-sm">
+            <li>Client captures audio (microphone or upload) and sends 5s chunks to the server via WebSocket / fetch.</li>
+            <li>Server stores, optionally concatenates chunks and runs an ASR model (eg. Whisper, faster-whisper) in streaming mode.</li>
+            <li>Speaker diarization runs (pyannote) to assign speaker labels to segments.</li>
+            <li>Transcribed segments + speaker labels are pushed to the client and shown live.</li>
+            <li>User can download the full transcript or copy segments.</li>
+          </ol>
+        </div>
+        <div class="bg-white p-6 rounded-lg shadow-sm">
+          <h3 class="font-semibold">Quick tips</h3>
+          <ul class="text-sm text-gray-600 mt-3 list-disc list-inside space-y-2">
+            <li>Tune chunk-length (5s is a good start) for latency vs accuracy tradeoff.</li>
+            <li>Run diarization asynchronously to avoid blocking transcription if you need lower latency.</li>
+            <li>Provide a "finalize" button so the server can merge chunks and run a final pass for improved accuracy.</li>
+          </ul>
+        </div>
+      </div>
+    </section>
+    <section id="deploy" class="mt-14">
+      <h2 class="text-2xl font-bold">Deploy & run</h2>
+      <div class="mt-4 bg-white p-6 rounded-lg shadow-sm text-sm text-gray-700">
+        <p>Typical steps to run locally or on a VM:</p>
+        <pre class="mt-3 bg-gray-100 p-3 rounded text-xs overflow-auto"># 1. create venv
+python -m venv .venv
+source .venv/bin/activate  # or .\\venv\\Scripts\\activate on Windows
+# 2. install requirements
+pip install -r requirements.txt
+# 3. run app(s)
+python app.py   # live recorder
+python app2.py  # upload-based transcription
+# 4. open in browser
+http://localhost:5000/index2.html
+http://localhost:5000/index2_upload.html
+</pre>
+        <p class="mt-3">If you want a single Flask app with a landing page route, add this snippet to your Flask app:</p>
+        <pre class="mt-3 bg-gray-100 p-3 rounded text-xs overflow-auto">@app.route('/')
+def landing():
+    return render_template('landing.html')
+</pre>
+      </div>
+    </section>
+    <section id="try" class="mt-14">
+      <h2 class="text-2xl font-bold">Try it now</h2>
+      <div class="mt-4 flex flex-col md:flex-row gap-4">
+        <a href="/live" class="px-4 py-3 rounded-lg bg-indigo-600 text-white">Open Live Recorder</a>
+        <a href="/upload" class="px-4 py-3 rounded-lg border">Upload & Transcribe</a>
+        <a href="#deploy" class="px-4 py-3 rounded-lg border">Deployment instructions</a>
+      </div>
+    </section>
+    <!-- <footer class="mt-16 text-center text-sm text-gray-500 pb-8">
+      Built with ❤️ — Add your logo, links, and analytics here.
+    </footer> -->
+  </main>
+  <script>
+    // tiny menu toggle for mobile
+    const menuBtn = document.getElementById('menuBtn');
+    menuBtn && menuBtn.addEventListener('click', ()=>{
+      alert('Use the links on the page: Features, How it works, Deploy, Try it now');
+    });
+  </script>
+</body>
+</html>

templates/test_index.html ADDED Viewed

	@@ -0,0 +1,292 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Audio Recorder & Transcription UI</title>
+    <style>
+        body {
+            font-family: 'Segoe UI', Arial, sans-serif;
+            background: linear-gradient(120deg, #f5f6fa 60%, #dbeafe 100%);
+            margin: 0;
+            padding: 0;
+        }
+        .container {
+            max-width: 750px;
+            margin: 40px auto;
+            background: #fff;
+            border-radius: 14px;
+            box-shadow: 0 4px 24px #0002;
+            padding: 32px 32px 24px 32px;
+        }
+        h1 {
+            margin-top: 0;
+            font-size: 2.2em;
+            letter-spacing: 1px;
+            color: #2563eb;
+            text-align: center;
+        }
+        label {
+            display: block;
+            margin-top: 18px;
+            font-weight: 600;
+            color: #334155;
+        }
+        select,
+        input[type="number"] {
+            margin-top: 6px;
+            padding: 8px;
+            font-size: 1em;
+            border-radius: 6px;
+            border: 1px solid #cbd5e1;
+            background: #f1f5f9;
+            width: 100%;
+            box-sizing: border-box;
+        }
+        button {
+            margin-top: 12px;
+            margin-right: 10px;
+            padding: 10px 22px;
+            font-size: 1em;
+            font-weight: 600;
+            border: none;
+            border-radius: 6px;
+            background: #2563eb;
+            color: #fff;
+            cursor: pointer;
+            transition: background 0.2s;
+        }
+        button:disabled {
+            background: #94a3b8;
+            cursor: not-allowed;
+        }
+        .stop-btn {
+            background: #dc2626;
+        }
+        .status {
+            margin-top: 18px;
+            font-weight: bold;
+            color: #0ea5e9;
+            text-align: center;
+            font-size: 1.1em;
+        }
+        .live {
+            margin-top: 32px;
+            background: #f1f5f9;
+            border-radius: 8px;
+            padding: 18px 18px 10px 18px;
+        }
+        .live h2 {
+            margin-top: 0;
+            color: #0ea5e9;
+            font-size: 1.2em;
+        }
+        .chunk {
+            background: #e0e7ef;
+            margin-bottom: 8px;
+            padding: 8px 12px;
+            border-radius: 5px;
+            font-size: 1em;
+            color: #334155;
+            box-shadow: 0 1px 2px #0001;
+        }
+        .files {
+            margin-top: 32px;
+            background: #f1f5f9;
+            border-radius: 8px;
+            padding: 18px 18px 10px 18px;
+        }
+        .files h2 {
+            margin-top: 0;
+            color: #2563eb;
+            font-size: 1.2em;
+        }
+        .file {
+            background: #e0e7ef;
+            margin-bottom: 8px;
+            padding: 8px 12px;
+            border-radius: 5px;
+            font-size: 1em;
+            color: #334155;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            box-shadow: 0 1px 2px #0001;
+        }
+        .file a {
+            color: #2563eb;
+            text-decoration: none;
+            font-weight: 500;
+        }
+        .file a:hover {
+            text-decoration: underline;
+        }
+        .footer {
+            margin-top: 36px;
+            text-align: center;
+            color: #64748b;
+            font-size: 0.95em;
+        }
+        @media (max-width: 600px) {
+            .container {
+                padding: 12px 4vw 12px 4vw;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Audio Recorder & Transcription</h1>
+        <div>
+            <label for="mic">Microphone Device</label>
+            <select id="mic" disabled>
+                <option value="1" selected>Microphone Device (#1)</option>
+            </select>
+            <label for="sys">System/Loopback Device (optional)</label>
+            <select id="sys" disabled>
+                <option value="16" selected>System Loopback Device (#16)</option>
+            </select>
+            <label for="chunk_secs">Chunk Length (seconds)</label>
+            <input type="number" id="chunk_secs" value="5" min="1" max="60" readonly>
+            <label for="model">Transcription Model</label>
+            <select id="model" disabled>
+                <option value="small">small</option>
+                <option value="medium" selected>medium</option>
+                <option value="large">large</option>
+            </select>
+            <div style="margin-top:18px; text-align:center;">
+                <button id="startBtn">Start Recording</button>
+                <button id="stopBtn" class="stop-btn" disabled>Stop Recording</button>
+            </div>
+        </div>
+        <div class="status" id="status"></div>
+        <div class="live">
+            <h2>Live Transcription</h2>
+            <div id="live"></div>
+        </div>
+        <div class="files">
+            <h2>Final Files</h2>
+            <div id="files"></div>
+        </div>
+        <div class="footer">
+            &copy; 2025 Audio Multi-Transcript UI &middot; Powered by Flask + PyAudio + Whisper
+        </div>
+    </div>
+    <script>
+        // --- Start/Stop Recording ---
+        let polling = null;
+        document.getElementById('startBtn').onclick = async function () {
+            const mic = 1; // static value
+            const sys = 16; // static value
+            const chunk_secs = 5; // static value
+            const model = "medium"; // static value
+            const no_transcribe = false;
+            document.getElementById('status').textContent = 'Starting...';
+            await fetch('/api/start-recording', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
+            });
+            document.getElementById('startBtn').disabled = true;
+            document.getElementById('stopBtn').disabled = false;
+            pollStatus();
+        };
+        document.getElementById('stopBtn').onclick = async function () {
+            await fetch('/api/stop-recording', { method: 'POST' });
+            document.getElementById('status').textContent = 'Stopping...';
+            document.getElementById('stopBtn').disabled = true;
+            if (polling) clearInterval(polling);
+            setTimeout(() => { loadFiles(); document.getElementById('startBtn').disabled = false; }, 2000);
+        };
+        // --- Poll status ---
+        function pollStatus() {
+            polling = setInterval(async () => {
+                const res = await fetch('/api/recording-status');
+                const data = await res.json();
+                document.getElementById('status').textContent = data.recording ? 'Recording...' : 'Idle';
+                // --- Show live transcription ---
+                const liveDiv = document.getElementById('live');
+                liveDiv.innerHTML = '';
+                if (data.live_segments && data.live_segments.length) {
+                    data.live_segments.slice(-10).forEach(seg => {
+                        const div = document.createElement('div');
+                        div.className = 'chunk';
+                        div.innerHTML = `<b>${seg.speaker || 'Speaker'}:</b> [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.text}`;
+                        liveDiv.appendChild(div);
+                    });
+                } else {
+                    liveDiv.textContent = 'No transcription yet.';
+                }
+                if (!data.recording) {
+                    clearInterval(polling);
+                    document.getElementById('startBtn').disabled = false;
+                    document.getElementById('stopBtn').disabled = true;
+                    loadFiles();
+                }
+            }, 1000);
+        }
+        // Helper to format time
+        function formatTime(s) {
+            if (s == null) return "0:00";
+            const mm = Math.floor(s / 60);
+            const ss = Math.floor(s % 60).toString().padStart(2, "0");
+            return `${mm}:${ss}`;
+        }
+        // --- Load final files ---
+        async function loadFiles() {
+            const filesDiv = document.getElementById('files');
+            filesDiv.innerHTML = '';
+            try {
+                const res = await fetch('/api/final-files');
+                const data = await res.json();
+                if (!data.files.length) {
+                    filesDiv.textContent = 'No files yet.';
+                    return;
+                }
+                data.files.forEach(f => {
+                    const div = document.createElement('div');
+                    div.className = 'file';
+                    div.innerHTML = `<span>${f.name}</span> <a href="${f.url || f.path}" target="_blank">Download</a>`;
+                    filesDiv.appendChild(div);
+                });
+            } catch (e) {
+                filesDiv.textContent = 'Error loading files.';
+            }
+        }
+        // --- On load ---
+        loadFiles();
+    </script>
+</body>
+</html>

templates/test_index3.html ADDED Viewed

	@@ -0,0 +1,300 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Audio Transcription Studio</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
+    <style>
+        body {
+            font-family: 'Inter', sans-serif;
+            background-color: #1a1a2e; /* Dark purple background */
+            color: #ffffff;
+        }
+        .container-bg {
+            background-color: #2c2c44; /* Slightly lighter purple for containers */
+        }
+        .panel-bg {
+            background-color: #22223b; /* Darker panel background */
+        }
+        .input-field {
+            background-color: #3b3b55;
+            border: 1px solid #4a4a6b;
+            color: #e0e0e0;
+        }
+        .button-glow {
+            box-shadow: 0 0 10px 2px #6a1b9a;
+        }
+        .glow-text {
+            text-shadow: 0 0 8px #d1c4e9;
+        }
+    </style>
+</head>
+<body class="flex items-center justify-center min-h-screen p-8">
+    <div class="w-full max-w-6xl">
+        <!-- Main Header -->
+        <header class="text-center mb-10">
+            <h1 class="text-5xl font-extrabold text-[#d1c4e9] glow-text mb-2">Audio Transcription Studio</h1>
+            <p class="text-lg text-gray-400">Record high-quality audio and get real-time AI-powered transcriptions with speaker detection.</p>
+        </header>
+        <!-- Main Content Grid -->
+        <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
+            <!-- Left Panel: Live Transcription -->
+            <div class="lg:col-span-2 panel-bg p-8 rounded-2xl shadow-xl">
+                <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-file-alt mr-2"></i> Live Transcription</h2>
+                <!-- Recording Status & Button -->
+                <div id="recording-status-area" class="flex flex-col items-center justify-center p-6 mb-8">
+                    <div id="status-spinner" class="relative w-32 h-32 hidden">
+                        <div class="absolute inset-0 border-4 border-purple-500 rounded-full animate-ping"></div>
+                        <div class="absolute inset-4 border-4 border-purple-400 rounded-full animate-ping delay-200"></div>
+                        <div class="absolute inset-8 border-4 border-purple-300 rounded-full animate-ping delay-400"></div>
+                        <div class="flex items-center justify-center h-full w-full">
+                             <i class="fas fa-microphone text-4xl text-white"></i>
+                        </div>
+                    </div>
+                    <div id="status-icon" class="relative w-32 h-32 flex items-center justify-center bg-purple-600 rounded-full">
+                        <i class="fas fa-microphone text-4xl text-white"></i>
+                    </div>
+                    <p id="status-text" class="mt-4 text-green-400 font-semibold text-lg">Ready to record</p>
+                    <div id="start-stop-buttons" class="mt-4">
+                        <button id="start-btn" class="bg-purple-600 hover:bg-purple-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 button-glow disabled:opacity-50 disabled:cursor-not-allowed">
+                            Start Recording
+                        </button>
+                        <button id="stop-btn" class="bg-red-600 hover:bg-red-700 text-white font-bold py-2 px-6 rounded-full transition duration-300 disabled:opacity-50 disabled:cursor-not-allowed hidden">
+                            Stop Recording
+                        </button>
+                    </div>
+                </div>
+                <!-- Live Transcription Display -->
+                <div id="live-transcription" class="bg-[#1b1b2a] p-6 rounded-lg h-96 overflow-y-auto border border-[#3b3b55]">
+                    <p class="text-gray-400 text-center text-lg mt-12">Start recording to see live transcription</p>
+                </div>
+            </div>
+            <!-- Right Panel: Recording Settings & Files -->
+            <div class="lg:col-span-1 space-y-8">
+                <!-- Recording Settings Panel -->
+                <div class="panel-bg p-8 rounded-2xl shadow-xl">
+                    <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-cogs mr-2"></i> Recording Settings</h2>
+                    <div class="space-y-6">
+                        <!-- Microphone Device -->
+                        <div>
+                            <label for="mic-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-microphone mr-2"></i>Microphone Device</label>
+                            <select id="mic-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
+                                <option value="">Loading devices...</option>
+                            </select>
+                        </div>
+                        <!-- System Audio -->
+                        <div>
+                            <label for="sys-select" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-desktop mr-2"></i>System Audio (Optional)</label>
+                            <select id="sys-select" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
+                                <option value="null">None</option>
+                            </select>
+                        </div>
+                        <!-- Chunk Length -->
+                        <div>
+                            <label for="chunk-secs-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-clock mr-2"></i>Chunk Length (seconds)</label>
+                            <input type="number" id="chunk-secs-input" value="5" min="1" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
+                        </div>
+                        <!-- Transcription Model -->
+                        <div>
+                            <label for="model-input" class="block text-sm font-medium text-gray-400 mb-2"><i class="fas fa-brain mr-2"></i>Transcription Model</label>
+                            <select id="model-input" class="block w-full rounded-md shadow-sm p-3 input-field focus:ring-purple-500 focus:border-purple-500">
+                                <option value="medium">Medium (Balanced)</option>
+                                <option value="small">Small</option>
+                                <option value="large">Large</option>
+                            </select>
+                        </div>
+                        <!-- Disable Transcription Toggle -->
+                        <div class="flex items-center">
+                            <input id="no-transcribe-checkbox" type="checkbox" class="h-5 w-5 text-purple-600 focus:ring-purple-500 rounded border-gray-600 bg-gray-700">
+                            <label for="no-transcribe-checkbox" class="ml-2 block text-sm text-gray-300">Disable Transcription</label>
+                        </div>
+                    </div>
+                </div>
+                <!-- Recording Files Panel -->
+                <div class="panel-bg p-8 rounded-2xl shadow-xl">
+                    <h2 class="text-2xl font-bold mb-4 text-[#d1c4e9]"><i class="fas fa-folder-open mr-2"></i> Recording Files</h2>
+                    <div id="final-files-list" class="space-y-2 text-gray-300">
+                        <p class="text-gray-500">No files yet...</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        const micSelect = document.getElementById('mic-select');
+        const sysSelect = document.getElementById('sys-select');
+        const chunkSecsInput = document.getElementById('chunk-secs-input');
+        const modelInput = document.getElementById('model-input');
+        const noTranscribeCheckbox = document.getElementById('no-transcribe-checkbox');
+        const startBtn = document.getElementById('start-btn');
+        const stopBtn = document.getElementById('stop-btn');
+        const statusText = document.getElementById('status-text');
+        const liveTranscription = document.getElementById('live-transcription');
+        const finalFilesList = document.getElementById('final-files-list');
+        const statusIcon = document.getElementById('status-icon');
+        const statusSpinner = document.getElementById('status-spinner');
+        let statusPollingInterval;
+        // Fetch available audio devices and populate the dropdowns
+        async function fetchDevices() {
+            try {
+                const response = await fetch('/api/devices');
+                const data = await response.json();
+                const micOptions = data.devices.map(device => `<option value="${device.index}">${device.name}</option>`).join('');
+                micSelect.innerHTML = micOptions;
+                const sysOptions = `<option value="null">None</option>` + micOptions;
+                sysSelect.innerHTML = sysOptions;
+                if (data.devices.length > 0) {
+                    micSelect.value = data.devices[0].index;
+                }
+            } catch (error) {
+                console.error('Error fetching devices:', error);
+                micSelect.innerHTML = `<option>Error loading devices</option>`;
+                sysSelect.innerHTML = `<option>Error loading devices</option>`;
+            }
+        }
+        // Fetch final files and display them
+        async function fetchFinalFiles() {
+            try {
+                const response = await fetch('/api/final-files');
+                const data = await response.json();
+                if (data.files.length > 0) {
+                    const filesHtml = data.files.map(file => `
+                        <a href="${file.url}" class="flex items-center text-purple-400 hover:text-purple-300 transition-colors duration-200" target="_blank">
+                            <i class="fas fa-file-waveform mr-2"></i><span>${file.name}</span>
+                        </a>
+                    `).join('');
+                    finalFilesList.innerHTML = filesHtml;
+                } else {
+                    finalFilesList.innerHTML = `<p class="text-gray-500">No files yet...</p>`;
+                }
+            } catch (error) {
+                console.error('Error fetching final files:', error);
+                finalFilesList.innerHTML = `<p class="text-red-500">Error loading files.</p>`;
+            }
+        }
+        // Poll the server for recording status and live segments
+        function startStatusPolling() {
+            statusPollingInterval = setInterval(async () => {
+                try {
+                    const response = await fetch('/api/recording-status');
+                    const data = await response.json();
+                    if (data.recording) {
+                        statusText.textContent = 'Recording...';
+                        statusText.classList.remove('text-green-400');
+                        statusText.classList.add('text-purple-400');
+                        statusIcon.classList.add('hidden');
+                        statusSpinner.classList.remove('hidden');
+                        liveTranscription.innerHTML = '';
+                        if (data.live_segments.length === 0) {
+                            liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Recording started. Waiting for transcription...</p>`;
+                        } else {
+                            data.live_segments.forEach(segment => {
+                                const p = document.createElement('p');
+                                p.className = 'text-gray-200 mb-1 leading-snug';
+                                p.innerHTML = `<span class="font-semibold text-purple-300">${segment.speaker}:</span> ${segment.text}`;
+                                liveTranscription.appendChild(p);
+                            });
+                            liveTranscription.scrollTop = liveTranscription.scrollHeight;
+                        }
+                        fetchFinalFiles();
+                    } else {
+                        statusText.textContent = 'Ready to record';
+                        statusText.classList.remove('text-purple-400');
+                        statusText.classList.add('text-green-400');
+                        statusIcon.classList.remove('hidden');
+                        statusSpinner.classList.add('hidden');
+                        clearInterval(statusPollingInterval);
+                        startBtn.classList.remove('hidden');
+                        stopBtn.classList.add('hidden');
+                        fetchFinalFiles();
+                    }
+                } catch (error) {
+                    console.error('Error polling status:', error);
+                    clearInterval(statusPollingInterval);
+                }
+            }, 1000);
+        }
+        // Start recording
+        startBtn.addEventListener('click', async () => {
+            const mic = micSelect.value;
+            const sys = sysSelect.value === 'null' ? null : sysSelect.value;
+            const chunk_secs = chunkSecsInput.value;
+            const model = modelInput.value;
+            const no_transcribe = noTranscribeCheckbox.checked;
+            try {
+                const response = await fetch('/api/start-recording', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ mic, sys, chunk_secs, model, no_transcribe })
+                });
+                if (response.ok) {
+                    startBtn.classList.add('hidden');
+                    stopBtn.classList.remove('hidden');
+                    liveTranscription.innerHTML = `<p class="text-gray-400 text-center text-lg mt-12">Starting recording...</p>`;
+                    startStatusPolling();
+                } else {
+                    const error = await response.json();
+                    alert(`Error: ${error.error}`);
+                }
+            } catch (error) {
+                console.error('Failed to start recording:', error);
+                alert('Failed to start recording. Check server connection.');
+            }
+        });
+        // Stop recording
+        stopBtn.addEventListener('click', async () => {
+            try {
+                const response = await fetch('/api/stop-recording', {
+                    method: 'POST'
+                });
+                if (response.ok) {
+                    // Status polling will handle UI updates after the server stops
+                }
+            } catch (error) {
+                console.error('Failed to stop recording:', error);
+            }
+        });
+        // Initial setup on page load
+        window.onload = () => {
+            fetchDevices();
+            fetchFinalFiles();
+        };
+    </script>
+</body>
+</html>