File size: 11,395 Bytes
4207399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# app.py
"""

Flask app to:

1) serve the provided upload template,

2) accept .mp3/.wav uploads and show an audio player,

3) start/stop recording from a system loopback device when the audio element plays/pauses,

4) stream live transcription back to the browser via Server-Sent Events (SSE).



Notes:

- Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.

- Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.

- This app assumes it runs on the same machine that has access to the local audio devices.

"""
import os
import time
import threading
import pathlib
from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
from werkzeug.utils import secure_filename

# import your recorder/transcriber helper (uploaded by you)
import rec_transcribe_extension as rte

UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}

app = Flask(__name__, static_folder=None)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Globals for recording thread management
recording_thread = None
recording_lock = threading.Lock()


def allowed_file(filename):
    ext = pathlib.Path(filename).suffix.lower()
    return ext in ALLOWED_EXT


def find_system_loopback_index():
    """

    Try to find a likely loopback / system audio input device.

    Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',

    'virtual', 'audio cable'. Otherwise fallback to default input device.

    """
    pa = None
    try:
        import pyaudio
        pa = pyaudio.PyAudio()
    except Exception:
        return None

    keywords = ["loop", "stereo", "mix", "what u hear",
                "virtual", "audio cable", "loopback", "monitor"]
    best_idx = None
    for i in range(pa.get_device_count()):
        try:
            dev = pa.get_device_info_by_index(i)
            name = (dev.get("name") or "").lower()
            max_in = dev.get("maxInputChannels", 0)
            if max_in <= 0:
                continue
            for kw in keywords:
                if kw in name:
                    best_idx = int(dev["index"])
                    pa.terminate()
                    return best_idx
        except Exception:
            continue

    # fallback: default input device
    try:
        default_info = pa.get_default_input_device_info()
        idx = int(default_info.get("index"))
        pa.terminate()
        return idx
    except Exception:
        if pa:
            pa.terminate()
        return None


@app.route("/", methods=["GET"])
def index():
    return render_template("index2_upload.html")


@app.route("/upload", methods=["POST"])
def upload():
    if 'file' not in request.files:
        return jsonify(success=False, error="No file part"), 400
    f = request.files['file']
    if f.filename == '':
        return jsonify(success=False, error="Empty filename"), 400
    filename = secure_filename(f.filename)
    if not allowed_file(filename):
        return jsonify(success=False, error="Extension not allowed"), 400

    # avoid collisions by prefixing timestamp
    ts = int(time.time() * 1000)
    filename = f"{ts}_{filename}"
    save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    f.save(save_path)
    url = f"/uploads/{filename}"
    return jsonify(success=True, url=url, filename=filename)


@app.route("/uploads/<path:filename>")
def uploaded_file(filename):
    return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)


@app.route("/start", methods=["POST"])
def start_recording():
    """

    Start a background thread which calls rec_transcribe_extension.run_recording(...)

    We try to detect a loopback device; if not found we pick the default input device.

    """
    global recording_thread
    body = request.get_json(force=True, silent=True) or {}
    filename = body.get('filename')

    # Basic check: uploaded file exists (we don't actually play the file on the server,
    # but it's a sanity check so user didn't start without uploading)
    if filename:
        if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
            return jsonify(success=False, error="Uploaded file not found on server"), 400

    with recording_lock:
        # if there's an active recording, return ok
        if recording_thread and recording_thread.is_alive():
            return jsonify(success=True, message="Recording already running")
        # clear any previous stop_event
        try:
            if hasattr(rte, 'stop_event'):
                rte.stop_event = threading.Event()  # new event the run_recording will wait on
        except Exception:
            pass

        # choose device: prefer loopback
        dev_index = find_system_loopback_index()
        if dev_index is None:
            return jsonify(success=False, error="No suitable audio input device found on server"), 500

        # Start the recording in a background thread
        def target():
            try:
                from rec_transcribe_extension import chunk_writer_and_transcribe_worker
                import rec_transcribe_extension as rte
                orig_worker = rte.chunk_writer_and_transcribe_worker

                def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
                    while True:
                        try:
                            filename, frames = in_queue.get(timeout=1.0)
                        except queue.Empty:
                            if rte.stop_event.is_set() and in_queue.empty():
                                break
                            continue

                        rte.save_wav_from_frames(
                            filename, frames, nchannels=rte.CHANNELS)
                        final_frames_list.extend(frames)

                        diar_segments = rte.diarization_hook(str(filename)) or []

                        if transcriber and transcriber.model:
                            try:
                                segments, info = transcriber.model.transcribe(
                                    str(filename), beam_size=5)
                                for seg in segments:
                                    seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
                                    speaker = "Unknown"
                                    for d_start, d_end, d_speaker in diar_segments:
                                        if (seg_start < d_end) and (seg_end > d_start):
                                            speaker = d_speaker
                                            break
                                    # Write formatted diarization line
                                    line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
                                    with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
                                        tf.write(line)
                            except Exception as e:
                                print(f"Transcription error for {filename}: {e}")

                    print("Patched worker exiting.")

                # apply patch
                rte.chunk_writer_and_transcribe_worker = patched_worker
                try:
                    rte.run_recording(mic_index=dev_index, sys_index=None,
                                    chunk_secs=getattr(
                                        rte, 'CHUNK_DURATION_SECS', 3),
                                    model_name=getattr(rte, 'MODEL_NAME', None),
                                    no_transcribe=False)
                finally:
                    rte.chunk_writer_and_transcribe_worker = orig_worker
            except Exception as e:
                print("run_recording exception:", e)


@app.route("/stop", methods=["POST"])
def stop_recording():
    """

    Signal the rec_transcribe_extension stop_event to stop gracefully.

    """
    global recording_thread
    with recording_lock:
        # set the stop_event in module
        if hasattr(rte, 'stop_event') and rte.stop_event is not None:
            try:
                rte.stop_event.set()
            except Exception:
                pass
    return jsonify(success=True, message="Stop signal sent")


def tail_transcript_file(path, stop_cond_fn=None):
    """

    Generator that tails the transcript file and yields SSE data lines.

    If file doesn't exist yet, yield a short status message then keep waiting.

    stop_cond_fn is a callable that when returns True will break.

    """
    last_pos = 0
    sent_initial = False
    while True:
        if stop_cond_fn and stop_cond_fn():
            break
        if os.path.exists(path):
            with open(path, "r", encoding="utf-8", errors="ignore") as fh:
                fh.seek(last_pos)
                lines = fh.readlines()
                if lines:
                    for ln in lines:
                        ln = ln.strip()
                        if ln:
                            yield f"data: {ln}\n\n"
                    last_pos = fh.tell()
                    sent_initial = True
                else:
                    # no new lines
                    time.sleep(0.25)
        else:
            if not sent_initial:
                yield "data: [info] Transcript file not yet created. Waiting...\n\n"
                sent_initial = True
            time.sleep(0.5)
    # final notification
    yield "data: [info] Transcription ended.\n\n"


@app.route("/events")
def events():
    """

    SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.

    The stream ends when the module stop_event is set and the background recording thread finishes.

    """
    transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
    if not transcript_path:
        return Response("No transcript file configured", status=500)
    transcript_path = str(transcript_path)

    def stop_fn():
        # stop when the recording thread is no longer alive AND the module stop_event is set
        cond = False
        try:
            cond = (hasattr(rte, 'stop_event')
                    and rte.stop_event is not None and rte.stop_event.is_set())
        except Exception:
            cond = False
        # also stop if thread finished
        t_alive = recording_thread.is_alive() if recording_thread is not None else False
        # If stop requested and thread not alive -> end stream
        return (cond and not t_alive)

    return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
                    mimetype="text/event-stream")


@app.route("/status")
def status():
    running = False
    if recording_thread and recording_thread.is_alive():
        running = True
    return jsonify(running=running)


if __name__ == "__main__":
    # run on localhost for local usage
    app.run(host="0.0.0.0", port=7860, threaded=True)