Spaces:

prthm11
/

AudioTransDiar

Sleeping

File size: 11,395 Bytes
# app.py
"""

Flask app to:

1) serve the provided upload template,

2) accept .mp3/.wav uploads and show an audio player,

3) start/stop recording from a system loopback device when the audio element plays/pauses,

4) stream live transcription back to the browser via Server-Sent Events (SSE).



Notes:

- Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.

- Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.

- This app assumes it runs on the same machine that has access to the local audio devices.

"""
import os
import time
import threading
import pathlib
from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
from werkzeug.utils import secure_filename

# import your recorder/transcriber helper (uploaded by you)
import rec_transcribe_extension as rte

UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}

app = Flask(__name__, static_folder=None)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Globals for recording thread management
recording_thread = None
recording_lock = threading.Lock()


def allowed_file(filename):
    ext = pathlib.Path(filename).suffix.lower()
    return ext in ALLOWED_EXT


def find_system_loopback_index():
    """

    Try to find a likely loopback / system audio input device.

    Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',

    'virtual', 'audio cable'. Otherwise fallback to default input device.

    """
    pa = None
    try:
        import pyaudio
        pa = pyaudio.PyAudio()
    except Exception:
        return None

    keywords = ["loop", "stereo", "mix", "what u hear",
                "virtual", "audio cable", "loopback", "monitor"]
    best_idx = None
    for i in range(pa.get_device_count()):
        try:
            dev = pa.get_device_info_by_index(i)
            name = (dev.get("name") or "").lower()
            max_in = dev.get("maxInputChannels", 0)
            if max_in <= 0:
                continue
            for kw in keywords:
                if kw in name:
                    best_idx = int(dev["index"])
                    pa.terminate()
                    return best_idx
        except Exception:
            continue

    # fallback: default input device
    try:
        default_info = pa.get_default_input_device_info()
        idx = int(default_info.get("index"))
        pa.terminate()
        return idx
    except Exception:
        if pa:
            pa.terminate()
        return None


@app.route("/", methods=["GET"])
def index():
    return render_template("index2_upload.html")


@app.route("/upload", methods=["POST"])
def upload():
    if 'file' not in request.files:
        return jsonify(success=False, error="No file part"), 400
    f = request.files['file']
    if f.filename == '':
        return jsonify(success=False, error="Empty filename"), 400
    filename = secure_filename(f.filename)
    if not allowed_file(filename):
        return jsonify(success=False, error="Extension not allowed"), 400

    # avoid collisions by prefixing timestamp
    ts = int(time.time() * 1000)
    filename = f"{ts}_{filename}"
    save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    f.save(save_path)
    url = f"/uploads/{filename}"
    return jsonify(success=True, url=url, filename=filename)


@app.route("/uploads/<path:filename>")
def uploaded_file(filename):
    return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)


@app.route("/start", methods=["POST"])
def start_recording():
    """

    Start a background thread which calls rec_transcribe_extension.run_recording(...)

    We try to detect a loopback device; if not found we pick the default input device.

    """
    global recording_thread
    body = request.get_json(force=True, silent=True) or {}
    filename = body.get('filename')

    # Basic check: uploaded file exists (we don't actually play the file on the server,
    # but it's a sanity check so user didn't start without uploading)
    if filename:
        if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
            return jsonify(success=False, error="Uploaded file not found on server"), 400

    with recording_lock:
        # if there's an active recording, return ok
        if recording_thread and recording_thread.is_alive():
            return jsonify(success=True, message="Recording already running")
        # clear any previous stop_event
        try:
            if hasattr(rte, 'stop_event'):
                rte.stop_event = threading.Event()  # new event the run_recording will wait on
        except Exception:
            pass

        # choose device: prefer loopback
        dev_index = find_system_loopback_index()
        if dev_index is None:
            return jsonify(success=False, error="No suitable audio input device found on server"), 500

        # Start the recording in a background thread
        def target():
            try:
                from rec_transcribe_extension import chunk_writer_and_transcribe_worker
                import rec_transcribe_extension as rte
                orig_worker = rte.chunk_writer_and_transcribe_worker

                def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
                    while True:
                        try:
                            filename, frames = in_queue.get(timeout=1.0)
                        except queue.Empty:
                            if rte.stop_event.is_set() and in_queue.empty():
                                break
                            continue

                        rte.save_wav_from_frames(
                            filename, frames, nchannels=rte.CHANNELS)
                        final_frames_list.extend(frames)

                        diar_segments = rte.diarization_hook(str(filename)) or []

                        if transcriber and transcriber.model:
                            try:
                                segments, info = transcriber.model.transcribe(
                                    str(filename), beam_size=5)
                                for seg in segments:
                                    seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
                                    speaker = "Unknown"
                                    for d_start, d_end, d_speaker in diar_segments:
                                        if (seg_start < d_end) and (seg_end > d_start):
                                            speaker = d_speaker
                                            break
                                    # Write formatted diarization line
                                    line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
                                    with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
                                        tf.write(line)
                            except Exception as e:
                                print(f"Transcription error for {filename}: {e}")

                    print("Patched worker exiting.")

                # apply patch
                rte.chunk_writer_and_transcribe_worker = patched_worker
                try:
                    rte.run_recording(mic_index=dev_index, sys_index=None,
                                    chunk_secs=getattr(
                                        rte, 'CHUNK_DURATION_SECS', 3),
                                    model_name=getattr(rte, 'MODEL_NAME', None),
                                    no_transcribe=False)
                finally:
                    rte.chunk_writer_and_transcribe_worker = orig_worker
            except Exception as e:
                print("run_recording exception:", e)


@app.route("/stop", methods=["POST"])
def stop_recording():
    """

    Signal the rec_transcribe_extension stop_event to stop gracefully.

    """
    global recording_thread
    with recording_lock:
        # set the stop_event in module
        if hasattr(rte, 'stop_event') and rte.stop_event is not None:
            try:
                rte.stop_event.set()
            except Exception:
                pass
    return jsonify(success=True, message="Stop signal sent")


def tail_transcript_file(path, stop_cond_fn=None):
    """

    Generator that tails the transcript file and yields SSE data lines.

    If file doesn't exist yet, yield a short status message then keep waiting.

    stop_cond_fn is a callable that when returns True will break.

    """
    last_pos = 0
    sent_initial = False
    while True:
        if stop_cond_fn and stop_cond_fn():
            break
        if os.path.exists(path):
            with open(path, "r", encoding="utf-8", errors="ignore") as fh:
                fh.seek(last_pos)
                lines = fh.readlines()
                if lines:
                    for ln in lines:
                        ln = ln.strip()
                        if ln:
                            yield f"data: {ln}\n\n"
                    last_pos = fh.tell()
                    sent_initial = True
                else:
                    # no new lines
                    time.sleep(0.25)
        else:
            if not sent_initial:
                yield "data: [info] Transcript file not yet created. Waiting...\n\n"
                sent_initial = True
            time.sleep(0.5)
    # final notification
    yield "data: [info] Transcription ended.\n\n"


@app.route("/events")
def events():
    """

    SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.

    The stream ends when the module stop_event is set and the background recording thread finishes.

    """
    transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
    if not transcript_path:
        return Response("No transcript file configured", status=500)
    transcript_path = str(transcript_path)

    def stop_fn():
        # stop when the recording thread is no longer alive AND the module stop_event is set
        cond = False
        try:
            cond = (hasattr(rte, 'stop_event')
                    and rte.stop_event is not None and rte.stop_event.is_set())
        except Exception:
            cond = False
        # also stop if thread finished
        t_alive = recording_thread.is_alive() if recording_thread is not None else False
        # If stop requested and thread not alive -> end stream
        return (cond and not t_alive)

    return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
                    mimetype="text/event-stream")


@app.route("/status")
def status():
    running = False
    if recording_thread and recording_thread.is_alive():
        running = True
    return jsonify(running=running)


if __name__ == "__main__":
    # run on localhost for local usage
    app.run(host="0.0.0.0", port=7860, threaded=True)