Spaces:

prthm11
/

AudioTransDiar

Sleeping

App Files Files Community

prthm11 commited on Sep 2, 2025

Commit

0ba7c0e

verified ·

1 Parent(s): 2a26bdf

Update merged.py

Browse files

Files changed (1) hide show

merged.py +273 -395

merged.py CHANGED Viewed

@@ -1,82 +1,125 @@
-# main.py
 import os
 import time
 import threading
 import queue
 import pathlib
-import pyaudio
 from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
 from werkzeug.utils import secure_filename
-# your helper module
-import rec_transcribe_extension as rte
-from rec_transcribe_extension import Transcriber, diarization_hook, run_recording, OUTPUT_DIR
-app = Flask(__name__)
-UPLOAD_FOLDER = "/app/uploads"
-# os.makedirs(UPLOAD_FOLDER, exist_ok=True)
-app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
 def allowed_file(filename: str) -> bool:
-    """Check if file extension is allowed"""
     ext = pathlib.Path(filename).suffix.lower()
     return ext in ALLOWED_EXT
-# ---------------- Shared state ----------------
 recording_thread = None
-recording_running = False
 recording_lock = threading.Lock()
-recording_status = {
-    "recording": False,
-    "live_segments": []
-}
-# ---------------- Landing + Frontend ----------------
 @app.route("/")
 def landing():
     return render_template("landing.html")
 @app.route("/live")
 def live_page():
     return render_template("index2.html")
 @app.route("/upload")
 def upload_page():
     return render_template("index2_upload.html")
-# ---------------- Device listing ----------------
 @app.route("/api/devices", methods=["GET"])
 def api_devices():
-    pa = pyaudio.PyAudio()
-    devices = []
-    for i in range(pa.get_device_count()):
-        dev = pa.get_device_info_by_index(i)
-        if dev.get("maxInputChannels", 0) > 0:
-            devices.append({"index": dev["index"], "name": dev["name"]})
-    pa.terminate()
-    return jsonify({"devices": devices})
-# --- Start recording ---
 @app.route("/api/start-recording", methods=["POST"])
 def api_start_recording():
-    global recording_thread, stop_event, recording_status
-    data = request.json
-    # Validate required fields
     try:
         mic = int(data.get("mic"))
     except Exception:
         return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
-    # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
     sys = None
     if data.get("sys") not in (None, "", "null"):
         try:
@@ -87,12 +130,13 @@ def api_start_recording():
     chunk_secs = int(data.get("chunk_secs", 5))
     model = data.get("model", "medium")
     no_transcribe = bool(data.get("no_transcribe", False))
     if recording_status["recording"]:
         return jsonify({"error": "Already recording"}), 400
-    # --- Validate that requested devices exist and have input channels ---
     try:
-        pa = pyaudio.PyAudio()
     except Exception as e:
         return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
@@ -113,137 +157,104 @@ def api_start_recording():
     pa.terminate()
-    # Reset state
     recording_status["recording"] = True
     recording_status["live_segments"] = []
     stop_event = threading.Event()
     def run():
-        # Patch: update live_segments after each chunk
-        from rec_transcribe_extension import chunk_writer_and_transcribe_worker
-        # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
-        import rec_transcribe_extension as rte
-        orig_worker = rte.chunk_writer_and_transcribe_worker
-        def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
-            while True:
-                try:
-                    filename, frames = in_queue.get(timeout=1.0)
-                except queue.Empty:
-                    if stop_event.is_set() and in_queue.empty():
-                        break
-                    continue
-                rte.save_wav_from_frames(
-                    filename, frames, nchannels=rte.CHANNELS)
-                final_frames_list.extend(frames)
-                diar = rte.diarization_hook(str(filename))
-                diar_segments = diar if diar else []
-                # Transcribe chunk and get segments with timestamps
-                if transcriber and transcriber.model:
                     try:
-                        segments, info = transcriber.model.transcribe(
-                            str(filename), beam_size=5)
-                        for seg in segments:
-                            seg_start = seg.start
-                            seg_end = seg.end
-                            seg_text = seg.text.strip()
-                            speaker = "Unknown"
-                            for d_start, d_end, d_speaker in diar_segments:
-                                if (seg_start < d_end) and (seg_end > d_start):
-                                    speaker = d_speaker
-                                    break
-                            # Update live_segments for frontend
-                            recording_status["live_segments"].append({
-                                "start": float(seg_start),
-                                "end": float(seg_end),
-                                "speaker": str(speaker),
-                                "text": seg_text
-                            })
-                            # Write to transcript file as before
-                            line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
-                            with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
-                                tf.write(line)
-                    except Exception as e:
-                        print(f"Transcription error for {filename.name}: {e}")
-            print("Chunk writer/transcriber worker exiting.")
-        rte.chunk_writer_and_transcribe_worker = patched_worker
-        try:
-            rte.stop_event = stop_event
-            run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
-                          model_name=model, no_transcribe=no_transcribe)
-        finally:
-            rte.chunk_writer_and_transcribe_worker = orig_worker
-            recording_status["recording"] = False
-    recording_thread = threading.Thread(target=run, daemon=True)
-    recording_thread.start()
-    return jsonify({"ok": True})
-# # ---------------- Recording APIs ----------------
-# @app.route("/api/start-recording", methods=["POST"])
-# def api_start_recording():
-#     global recording_thread, recording_status
-#     data = request.json or {}
-#     mic = int(data.get("mic", -1))
-#     sys = data.get("sys")
-#     if sys in (None, "", "null"):
-#         sys = None
-#     else:
-#         sys = int(sys)
-#     chunk_secs = int(data.get("chunk_secs", 5))
-#     model = data.get("model", "medium")
-#     no_transcribe = bool(data.get("no_transcribe", False))
-#     if recording_status["recording"]:
-#         return jsonify({"error": "Already recording"}), 400
-#     # validate devices
-#     pa = pyaudio.PyAudio()
-#     def valid(dev_idx):
-#         try:
-#             dev = pa.get_device_info_by_index(dev_idx)
-#             return dev.get("maxInputChannels", 0) > 0
-#         except Exception:
-#             return False
-#     if not valid(mic):
-#         pa.terminate()
-#         return jsonify({"error": f"Mic device {mic} invalid"}), 400
-#     if sys is not None and not valid(sys):
-#         pa.terminate()
-#         return jsonify({"error": f"System device {sys} invalid"}), 400
-#     pa.terminate()
-#     # reset state
-#     recording_status["recording"] = True
-#     recording_status["live_segments"] = []
-#     rte.stop_event = threading.Event()
-#     def run():
-#         try:
-#             run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
-#                           model_name=model, no_transcribe=no_transcribe)
-#         finally:
-#             recording_status["recording"] = False
-#     recording_thread = threading.Thread(target=run, daemon=True)
-#     recording_thread.start()
-#     return jsonify({"ok": True})
 @app.route("/api/stop-recording", methods=["POST"])
 def api_stop_recording():
-    if hasattr(rte, "stop_event") and rte.stop_event:
-        rte.stop_event.set()
     return jsonify({"ok": True})
 @app.route("/api/recording-status")
 def api_recording_status():
     return jsonify({
@@ -251,9 +262,7 @@ def api_recording_status():
         "live_segments": recording_status.get("live_segments", [])
     })
-# ---------------- Upload-based APIs ----------------
 @app.route("/api/upload", methods=["POST"])
 def api_upload_file():
     if 'file' not in request.files:
@@ -264,231 +273,86 @@ def api_upload_file():
     filename = secure_filename(f.filename)
     if not allowed_file(filename):
         return jsonify(success=False, error="Extension not allowed"), 400
-    # avoid collisions by prefixing timestamp
     ts = int(time.time() * 1000)
-    filename = f"{ts}_{filename}"
-    save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-    f.save(save_path)
-    url = f"/uploads/{filename}"
-    return jsonify(success=True, url=url, filename=filename)
-# ---------------- File serving ----------------
 @app.route("/uploads/<path:filename>")
 def uploaded_file(filename):
     return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
-# @app.route("/api/start-transcribe-file", methods=["POST"])
-# def api_start_transcribe_file():
-#     data = request.json or {}
-#     filename = data.get("filename")
-#     file_path = OUTPUT_DIR / filename
-#     if not file_path.exists():
-#         return jsonify({"error": "File not found"}), 404
-#     if recording_status.get("recording"):
-#         return jsonify({"error": "Busy"}), 400
-#     def worker():
-#         try:
-#             recording_status["recording"] = True
-#             recording_status["live_segments"] = []
-#             transcriber = Transcriber()
-#             diar_segments = diarization_hook(str(file_path)) or []
-#             segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
-#             start_clock = time.time()
-#             for seg in segments:
-#                 wait_for = seg.start - (time.time() - start_clock)
-#                 if wait_for > 0:
-#                     time.sleep(wait_for)
-#                 speaker = "Unknown"
-#                 for d_start, d_end, d_label in diar_segments:
-#                     if (seg.start < d_end) and (seg.end > d_start):
-#                         speaker = d_label
-#                         break
-#                 seg_obj = {
-#                     "start": float(seg.start),
-#                     "end": float(seg.end),
-#                     "speaker": speaker,
-#                     "text": seg.text.strip()
-#                 }
-#                 recording_status["live_segments"].append(seg_obj)
-#                 # --- NEW: also append to transcript file so /events SSE can stream it ---
-#                 line = f"{seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
-#                 with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
-#                     tf.write(line)
-#             recording_status["recording"] = False
-#         except Exception as e:
-#             print("Error in file transcription:", e)
-#             recording_status["recording"] = False
-#     threading.Thread(target=worker, daemon=True).start()
-#     return jsonify({"ok": True})
-def find_system_loopback_index():
-    """
-    Try to find a likely loopback / system audio input device.
-    Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
-    'virtual', 'audio cable'. Otherwise fallback to default input device.
-    """
-    pa = None
-    try:
-        import pyaudio
-        pa = pyaudio.PyAudio()
-    except Exception:
-        return None
-    keywords = ["loop", "stereo", "mix", "what u hear", "virtual", "audio cable", "loopback", "monitor"]
-    best_idx = None
-    for i in range(pa.get_device_count()):
-        try:
-            dev = pa.get_device_info_by_index(i)
-            name = (dev.get("name") or "").lower()
-            max_in = dev.get("maxInputChannels", 0)
-            if max_in <= 0:
-                continue
-            for kw in keywords:
-                if kw in name:
-                    best_idx = int(dev["index"])
-                    pa.terminate()
-                    return best_idx
-        except Exception:
-            continue
-    try:
-        default_info = pa.get_default_input_device_info()
-        idx = int(default_info.get("index"))
-        pa.terminate()
-        return idx
-    except Exception:
-        if pa:
-            pa.terminate()
-        return None
 @app.route("/api/start-transcribe-file", methods=["POST"])
 def api_start_transcribe_file():
-    """
-    Start a background thread which calls rec_transcribe_extension.run_recording(...)
-    We try to detect a loopback device; if not found we pick the default input device.
-    """
-    global recording_thread
-    body = request.get_json(force=True, silent=True) or {}
-    filename = body.get('filename')
-    # Basic check: uploaded file exists (we don't actually play the file on the server,
-    # but it's a sanity check so user didn't start without uploading)
-    if filename:
-        if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
-            return jsonify(success=False, error="Uploaded file not found on server"), 400
-    with recording_lock:
-        # if there's an active recording, return ok
-        if recording_thread and recording_thread.is_alive():
-            return jsonify(success=True, message="Recording already running")
-        # clear any previous stop_event
         try:
-            if hasattr(rte, 'stop_event'):
-                rte.stop_event = threading.Event()  # new event the run_recording will wait on
-        except Exception:
-            pass
-        # choose device: prefer loopback
-        dev_index = find_system_loopback_index()
-        if dev_index is None:
-            return jsonify(success=False, error="No suitable audio input device found on server"), 500
-        # Start the recording in a background thread
-        def target():
-            try:
-                from rec_transcribe_extension import chunk_writer_and_transcribe_worker
-                import rec_transcribe_extension as rte
-                orig_worker = rte.chunk_writer_and_transcribe_worker
-                def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
-                    while True:
-                        try:
-                            filename, frames = in_queue.get(timeout=1.0)
-                        except queue.Empty:
-                            if rte.stop_event.is_set() and in_queue.empty():
-                                break
-                            continue
-                        rte.save_wav_from_frames(filename, frames, nchannels=rte.CHANNELS)
-                        final_frames_list.extend(frames)
-                        diar_segments = rte.diarization_hook(str(filename)) or []
-                        if transcriber and transcriber.model:
-                            try:
-                                segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
-                                for seg in segments:
-                                    seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
-                                    speaker = "Unknown"
-                                    for d_start, d_end, d_speaker in diar_segments:
-                                        if (seg_start < d_end) and (seg_end > d_start):
-                                            speaker = d_speaker
-                                            break
-                                    # Write diarized transcript line
-                                    line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
-                                    with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
-                                        tf.write(line)
-                            except Exception as e:
-                                print(f"Transcription error for {filename}: {e}")
-                    print("Patched worker exiting.")
-                # Apply patch
-                rte.chunk_writer_and_transcribe_worker = patched_worker
-                try:
-                    rte.run_recording(
-                        mic_index=dev_index,
-                        sys_index=None,
-                        chunk_secs=getattr(rte, 'CHUNK_DURATION_SECS', 3),
-                        model_name=getattr(rte, 'MODEL_NAME', None),
-                        no_transcribe=False
-                    )
-                finally:
-                    rte.chunk_writer_and_transcribe_worker = orig_worker
-            except Exception as e:
-                print("run_recording exception:", e)
-        recording_thread = threading.Thread(target=target, daemon=True)
-        recording_thread.start()
-        return jsonify(success=True, message="Recording started", device_index=dev_index)
-# @app.route("/static/<path:filename>")
-# def static_files(filename):
-#     return send_from_directory(OUTPUT_DIR, filename)
 @app.route("/stop", methods=["POST"])
 def stop_recording():
-    """
-    Signal the rec_transcribe_extension stop_event to stop gracefully.
-    """
-    global recording_thread
-    with recording_lock:
-        # set the stop_event in module
-        if hasattr(rte, 'stop_event') and rte.stop_event is not None:
-            try:
-                rte.stop_event.set()
-            except Exception:
-                pass
     return jsonify(success=True, message="Stop signal sent")
 def tail_transcript_file(path, stop_cond_fn=None):
-    """
-    Generator that tails the transcript file and yields SSE data lines.
-    If file doesn't exist yet, yield a short status message then keep waiting.
-    stop_cond_fn is a callable that when returns True will break.
-    """
     last_pos = 0
     sent_initial = False
     while True:
@@ -506,52 +370,66 @@ def tail_transcript_file(path, stop_cond_fn=None):
                     last_pos = fh.tell()
                     sent_initial = True
                 else:
-                    # no new lines
                     time.sleep(0.25)
         else:
             if not sent_initial:
                 yield "data: [info] Transcript file not yet created. Waiting...\n\n"
                 sent_initial = True
             time.sleep(0.5)
-    # final notification
     yield "data: [info] Transcription ended.\n\n"
-# ---------------- SSE events (from app2) ----------------
 @app.route("/events")
 def events():
-    """
-    SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
-    The stream ends when the module stop_event is set and the background recording thread finishes.
-    """
-    transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
-    if not transcript_path:
-        return Response("No transcript file configured", status=500)
-    transcript_path = str(transcript_path)
     def stop_fn():
-        # stop when the recording thread is no longer alive AND the module stop_event is set
         cond = False
         try:
-            cond = (hasattr(rte, 'stop_event')
-                    and rte.stop_event is not None and rte.stop_event.is_set())
         except Exception:
             cond = False
-        # also stop if thread finished
-        t_alive = recording_thread.is_alive() if recording_thread is not None else False
-        # If stop requested and thread not alive -> end stream
         return (cond and not t_alive)
-    return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
-                    mimetype="text/event-stream")
 @app.route("/status")
 def status():
     running = False
-    if recording_thread and recording_thread.is_alive():
-        running = True
     return jsonify(running=running)
-# ---------------- Run ----------------
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860, debug=True)

+# merged.py (production-ready for Docker / Hugging Face Spaces)
 import os
 import time
 import threading
 import queue
 import pathlib
+from pathlib import Path
 from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
 from werkzeug.utils import secure_filename
+# Try to import rec_transcribe_extension; we still rely on its utilities
+try:
+    import rec_transcribe_extension as rte
+    from rec_transcribe_extension import Transcriber, diarization_hook, run_recording
+except Exception as e:
+    # If the module import fails, keep rte=None and catch later to provide friendly error messages
+    rte = None
+    Transcriber = None
+    diarization_hook = None
+    run_recording = None
+    print("Warning: failed to import rec_transcribe_extension:", e)
+# ---- Environment-driven directories & config ----
+DEFAULT_OUTPUT = os.environ.get("OUTPUT_DIR", "/app/output_transcript_diarization")
+OUTPUT_DIR = Path(DEFAULT_OUTPUT)
+try:
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+except Exception as ex:
+    # fallback to /tmp if creation in the requested location fails (common in some runtimes)
+    OUTPUT_DIR = Path("/tmp/output_transcript_diarization")
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+# transcript file path used by SSE endpoint
+TRANSCRIPT_FILE = OUTPUT_DIR / "transcript.txt"
+# Ensure uploads dir exists (web uploads)
+UPLOAD_FOLDER = Path(os.environ.get("UPLOAD_FOLDER", "/app/uploads"))
+try:
+    UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
+except Exception:
+    UPLOAD_FOLDER = Path("/tmp/uploads")
+    UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
 ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
 def allowed_file(filename: str) -> bool:
     ext = pathlib.Path(filename).suffix.lower()
     return ext in ALLOWED_EXT
+# ---- Try to import pyaudio lazily and detect if host audio devices are accessible ----
+LIVE_RECORDING_SUPPORTED = False
+_pyaudio = None
+try:
+    import importlib
+    _pyaudio = importlib.import_module("pyaudio")
+    # attempt to instantiate PyAudio to confirm it's functional
+    try:
+        pa = _pyaudio.PyAudio()
+        # if there is at least one input device, consider live recording possible
+        has_input = any(pa.get_device_info_by_index(i).get("maxInputChannels", 0) > 0
+                        for i in range(pa.get_device_count()))
+        pa.terminate()
+        LIVE_RECORDING_SUPPORTED = bool(has_input)
+    except Exception as e:
+        LIVE_RECORDING_SUPPORTED = False
+        print("PyAudio imported but couldn't initialize audio devices:", e)
+except Exception:
+    # pyaudio not available
+    LIVE_RECORDING_SUPPORTED = False
+# ---- Flask app ----
+app = Flask(__name__, static_folder=None)
+app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER)
+# ---- Shared state ----
 recording_thread = None
 recording_lock = threading.Lock()
+recording_status = {"recording": False, "live_segments": []}
+# ---- Frontend routes ----
 @app.route("/")
 def landing():
     return render_template("landing.html")
 @app.route("/live")
 def live_page():
     return render_template("index2.html")
 @app.route("/upload")
 def upload_page():
     return render_template("index2_upload.html")
+# ---- Device listing (only if supported) ----
 @app.route("/api/devices", methods=["GET"])
 def api_devices():
+    if not LIVE_RECORDING_SUPPORTED:
+        return jsonify({"devices": [], "error": "Live recording not supported in this environment."}), 200
+    try:
+        pa = _pyaudio.PyAudio()
+        devices = []
+        for i in range(pa.get_device_count()):
+            dev = pa.get_device_info_by_index(i)
+            if dev.get("maxInputChannels", 0) > 0:
+                devices.append({"index": dev["index"], "name": dev["name"]})
+        pa.terminate()
+        return jsonify({"devices": devices})
+    except Exception as e:
+        return jsonify({"devices": [], "error": str(e)}), 500
+# ---- Start recording endpoint (guards if pyaudio unavailable) ----
 @app.route("/api/start-recording", methods=["POST"])
 def api_start_recording():
+    global recording_thread
+    if not LIVE_RECORDING_SUPPORTED or _pyaudio is None:
+        return jsonify({"error": "Live recording is not supported in this environment."}), 400
+    data = request.json or {}
     try:
         mic = int(data.get("mic"))
     except Exception:
         return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
     sys = None
     if data.get("sys") not in (None, "", "null"):
         try:
     chunk_secs = int(data.get("chunk_secs", 5))
     model = data.get("model", "medium")
     no_transcribe = bool(data.get("no_transcribe", False))
     if recording_status["recording"]:
         return jsonify({"error": "Already recording"}), 400
+    # validate devices using pyaudio
     try:
+        pa = _pyaudio.PyAudio()
     except Exception as e:
         return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
     pa.terminate()
+    # ready recording state
     recording_status["recording"] = True
     recording_status["live_segments"] = []
     stop_event = threading.Event()
     def run():
+        # monkey-patch worker if module supports it
+        if rte and hasattr(rte, "chunk_writer_and_transcribe_worker"):
+            import rec_transcribe_extension as rte_local
+            orig_worker = rte_local.chunk_writer_and_transcribe_worker
+            def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
+                while True:
                     try:
+                        filename, frames = in_queue.get(timeout=1.0)
+                    except queue.Empty:
+                        if stop_event.is_set() and in_queue.empty():
+                            break
+                        continue
+                    try:
+                        rte_local.save_wav_from_frames(filename, frames, nchannels=rte_local.CHANNELS)
+                    except Exception:
+                        # best-effort; continue
+                        pass
+                    # diarization and transcription
+                    diar_segments = []
+                    try:
+                        diar_segments = (rte_local.diarization_hook(str(filename)) or [])
+                    except Exception:
+                        diar_segments = []
+                    if transcriber and getattr(transcriber, "model", None):
+                        try:
+                            segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
+                            for seg in segments:
+                                seg_start = float(getattr(seg, "start", 0.0))
+                                seg_end = float(getattr(seg, "end", 0.0))
+                                seg_text = getattr(seg, "text", "").strip()
+                                speaker = "Unknown"
+                                for d_start, d_end, d_speaker in diar_segments:
+                                    if (seg_start < d_end) and (seg_end > d_start):
+                                        speaker = d_speaker
+                                        break
+                                recording_status["live_segments"].append({
+                                    "start": seg_start,
+                                    "end": seg_end,
+                                    "speaker": str(speaker),
+                                    "text": seg_text
+                                })
+                                # write to persistent transcript file
+                                try:
+                                    with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                                        tf.write(f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n")
+                                except Exception:
+                                    pass
+                        except Exception as e:
+                            print("Transcription error:", e)
+                # patched worker exit
+            rte_local.chunk_writer_and_transcribe_worker = patched_worker
+            try:
+                rte_local.stop_event = stop_event
+                rte_local.run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
+                                        model_name=model, no_transcribe=no_transcribe)
+            finally:
+                rte_local.chunk_writer_and_transcribe_worker = orig_worker
+        else:
+            # fallback: call run_recording if available without monkey patch
+            try:
+                if rte and hasattr(rte, "stop_event"):
+                    rte.stop_event = stop_event
+                if rte and hasattr(rte, "run_recording"):
+                    rte.run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
+                                      model_name=model, no_transcribe=no_transcribe)
+            except Exception as e:
+                print("run_recording error:", e)
+        recording_status["recording"] = False
+    recording_thread_local = threading.Thread(target=run, daemon=True)
+    recording_thread_local.start()
+    # store reference globally so stop logic can use it
+    global recording_thread
+    recording_thread = recording_thread_local
+    return jsonify({"ok": True})
+# Stop recording
 @app.route("/api/stop-recording", methods=["POST"])
 def api_stop_recording():
+    if rte and hasattr(rte, "stop_event") and rte.stop_event:
+        try:
+            rte.stop_event.set()
+        except Exception:
+            pass
     return jsonify({"ok": True})
+# recording status
 @app.route("/api/recording-status")
 def api_recording_status():
     return jsonify({
         "live_segments": recording_status.get("live_segments", [])
     })
+# ---- Upload endpoint (works in Spaces) ----
 @app.route("/api/upload", methods=["POST"])
 def api_upload_file():
     if 'file' not in request.files:
     filename = secure_filename(f.filename)
     if not allowed_file(filename):
         return jsonify(success=False, error="Extension not allowed"), 400
     ts = int(time.time() * 1000)
+    saved_name = f"{ts}_{filename}"
+    save_path = Path(app.config['UPLOAD_FOLDER']) / saved_name
+    try:
+        f.save(str(save_path))
+    except Exception as e:
+        return jsonify(success=False, error=f"Failed to save file: {e}"), 500
+    url = f"/uploads/{saved_name}"
+    return jsonify(success=True, url=url, filename=saved_name)
+# Serve uploaded files
 @app.route("/uploads/<path:filename>")
 def uploaded_file(filename):
     return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
+# ---- Transcribe an uploaded file in a paced 'live' manner (works in Spaces) ----
 @app.route("/api/start-transcribe-file", methods=["POST"])
 def api_start_transcribe_file():
+    data = request.json or {}
+    filename = data.get("filename")
+    if not filename:
+        return jsonify({"error": "Missing filename"}), 400
+    file_path = OUTPUT_DIR / filename
+    # if file was uploaded to uploads folder, prefer that path
+    uploaded_path = Path(app.config['UPLOAD_FOLDER']) / filename
+    if uploaded_path.exists():
+        file_path = uploaded_path
+    if not file_path.exists():
+        return jsonify({"error": "File not found"}), 404
+    if recording_status.get("recording"):
+        return jsonify({"error": "Busy"}), 400
+    def worker():
         try:
+            recording_status["recording"] = True
+            recording_status["live_segments"] = []
+            transcriber = Transcriber() if Transcriber else None
+            diar_segments = diarization_hook(str(file_path)) if diarization_hook else []
+            if transcriber and getattr(transcriber, "model", None):
+                segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
+                start_clock = time.time()
+                for seg in segments:
+                    wait_for = seg.start - (time.time() - start_clock)
+                    if wait_for > 0:
+                        time.sleep(wait_for)
+                    speaker = "Unknown"
+                    for d_start, d_end, d_label in (diar_segments or []):
+                        if (seg.start < d_end) and (seg.end > d_start):
+                            speaker = d_label
+                            break
+                    seg_obj = {"start": float(seg.start), "end": float(seg.end), "speaker": speaker, "text": seg.text.strip()}
+                    recording_status["live_segments"].append(seg_obj)
+                    # append to transcript file for SSE streaming
+                    try:
+                        with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
+                            tf.write(f"[{file_path.name}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n")
+                    except Exception:
+                        pass
+            recording_status["recording"] = False
+        except Exception as e:
+            print("Error in file transcription:", e)
+            recording_status["recording"] = False
+    threading.Thread(target=worker, daemon=True).start()
+    return jsonify({"ok": True})
+# Stop (generic)
 @app.route("/stop", methods=["POST"])
 def stop_recording():
+    if rte and hasattr(rte, 'stop_event') and rte.stop_event is not None:
+        try:
+            rte.stop_event.set()
+        except Exception:
+            pass
     return jsonify(success=True, message="Stop signal sent")
+# SSE tailer
 def tail_transcript_file(path, stop_cond_fn=None):
     last_pos = 0
     sent_initial = False
     while True:
                     last_pos = fh.tell()
                     sent_initial = True
                 else:
                     time.sleep(0.25)
         else:
             if not sent_initial:
                 yield "data: [info] Transcript file not yet created. Waiting...\n\n"
                 sent_initial = True
             time.sleep(0.5)
     yield "data: [info] Transcription ended.\n\n"
 @app.route("/events")
 def events():
+    transcript_path = str(TRANSCRIPT_FILE)
     def stop_fn():
         cond = False
         try:
+            cond = (rte and hasattr(rte, 'stop_event') and rte.stop_event is not None and rte.stop_event.is_set())
         except Exception:
             cond = False
+        t_alive = False
+        try:
+            t_alive = 'recording_thread' in globals() and recording_thread is not None and recording_thread.is_alive()
+        except Exception:
+            t_alive = False
         return (cond and not t_alive)
+    return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)), mimetype="text/event-stream")
 @app.route("/status")
 def status():
     running = False
+    try:
+        running = recording_status.get("recording", False)
+    except Exception:
+        running = False
     return jsonify(running=running)
+# Final-files listing (for UI)
+@app.route("/api/final-files")
+def api_final_files():
+    files = []
+    # list files from OUTPUT_DIR and uploads
+    try:
+        out_dir = OUTPUT_DIR
+        for fname in os.listdir(out_dir):
+            if fname.endswith(".wav") or fname.endswith(".txt"):
+                files.append({"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"})
+    except Exception:
+        pass
+    # also list uploaded files
+    try:
+        for fname in os.listdir(app.config['UPLOAD_FOLDER']):
+            if fname.endswith(".wav") or fname.endswith(".mp3") or fname.endswith(".txt"):
+                files.append({"name": fname, "path": f"/uploads/{fname}", "url": f"/uploads/{fname}"})
+    except Exception:
+        pass
+    return jsonify({"files": files})
+# Serve static final-files from OUTPUT_DIR (if you want to expose them at /static/<file>)
+@app.route('/static/<path:filename>')
+def static_files(filename):
+    return send_from_directory(str(OUTPUT_DIR), filename)
+# Run only when debugging locally; in production we use gunicorn
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)), threaded=True)