# app.py """ Flask app to: 1) serve the provided upload template, 2) accept .mp3/.wav uploads and show an audio player, 3) start/stop recording from a system loopback device when the audio element plays/pauses, 4) stream live transcription back to the browser via Server-Sent Events (SSE). Notes: - Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks. - Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE. - This app assumes it runs on the same machine that has access to the local audio devices. """ import os import time import threading import pathlib from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template from werkzeug.utils import secure_filename # import your recorder/transcriber helper (uploaded by you) import rec_transcribe_extension as rte UPLOAD_FOLDER = "uploads" os.makedirs(UPLOAD_FOLDER, exist_ok=True) ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'} app = Flask(__name__, static_folder=None) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # Globals for recording thread management recording_thread = None recording_lock = threading.Lock() def allowed_file(filename): ext = pathlib.Path(filename).suffix.lower() return ext in ALLOWED_EXT def find_system_loopback_index(): """ Try to find a likely loopback / system audio input device. Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear', 'virtual', 'audio cable'. Otherwise fallback to default input device. """ pa = None try: import pyaudio pa = pyaudio.PyAudio() except Exception: return None keywords = ["loop", "stereo", "mix", "what u hear", "virtual", "audio cable", "loopback", "monitor"] best_idx = None for i in range(pa.get_device_count()): try: dev = pa.get_device_info_by_index(i) name = (dev.get("name") or "").lower() max_in = dev.get("maxInputChannels", 0) if max_in <= 0: continue for kw in keywords: if kw in name: best_idx = int(dev["index"]) pa.terminate() return best_idx except Exception: continue # fallback: default input device try: default_info = pa.get_default_input_device_info() idx = int(default_info.get("index")) pa.terminate() return idx except Exception: if pa: pa.terminate() return None @app.route("/", methods=["GET"]) def index(): return render_template("index2_upload.html") @app.route("/upload", methods=["POST"]) def upload(): if 'file' not in request.files: return jsonify(success=False, error="No file part"), 400 f = request.files['file'] if f.filename == '': return jsonify(success=False, error="Empty filename"), 400 filename = secure_filename(f.filename) if not allowed_file(filename): return jsonify(success=False, error="Extension not allowed"), 400 # avoid collisions by prefixing timestamp ts = int(time.time() * 1000) filename = f"{ts}_{filename}" save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) f.save(save_path) url = f"/uploads/{filename}" return jsonify(success=True, url=url, filename=filename) @app.route("/uploads/") def uploaded_file(filename): return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False) @app.route("/start", methods=["POST"]) def start_recording(): """ Start a background thread which calls rec_transcribe_extension.run_recording(...) We try to detect a loopback device; if not found we pick the default input device. """ global recording_thread body = request.get_json(force=True, silent=True) or {} filename = body.get('filename') # Basic check: uploaded file exists (we don't actually play the file on the server, # but it's a sanity check so user didn't start without uploading) if filename: if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)): return jsonify(success=False, error="Uploaded file not found on server"), 400 with recording_lock: # if there's an active recording, return ok if recording_thread and recording_thread.is_alive(): return jsonify(success=True, message="Recording already running") # clear any previous stop_event try: if hasattr(rte, 'stop_event'): rte.stop_event = threading.Event() # new event the run_recording will wait on except Exception: pass # choose device: prefer loopback dev_index = find_system_loopback_index() if dev_index is None: return jsonify(success=False, error="No suitable audio input device found on server"), 500 # Start the recording in a background thread def target(): try: from rec_transcribe_extension import chunk_writer_and_transcribe_worker import rec_transcribe_extension as rte orig_worker = rte.chunk_writer_and_transcribe_worker def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"): while True: try: filename, frames = in_queue.get(timeout=1.0) except queue.Empty: if rte.stop_event.is_set() and in_queue.empty(): break continue rte.save_wav_from_frames( filename, frames, nchannels=rte.CHANNELS) final_frames_list.extend(frames) diar_segments = rte.diarization_hook(str(filename)) or [] if transcriber and transcriber.model: try: segments, info = transcriber.model.transcribe( str(filename), beam_size=5) for seg in segments: seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip() speaker = "Unknown" for d_start, d_end, d_speaker in diar_segments: if (seg_start < d_end) and (seg_end > d_start): speaker = d_speaker break # Write formatted diarization line line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n" with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf: tf.write(line) except Exception as e: print(f"Transcription error for {filename}: {e}") print("Patched worker exiting.") # apply patch rte.chunk_writer_and_transcribe_worker = patched_worker try: rte.run_recording(mic_index=dev_index, sys_index=None, chunk_secs=getattr( rte, 'CHUNK_DURATION_SECS', 3), model_name=getattr(rte, 'MODEL_NAME', None), no_transcribe=False) finally: rte.chunk_writer_and_transcribe_worker = orig_worker except Exception as e: print("run_recording exception:", e) @app.route("/stop", methods=["POST"]) def stop_recording(): """ Signal the rec_transcribe_extension stop_event to stop gracefully. """ global recording_thread with recording_lock: # set the stop_event in module if hasattr(rte, 'stop_event') and rte.stop_event is not None: try: rte.stop_event.set() except Exception: pass return jsonify(success=True, message="Stop signal sent") def tail_transcript_file(path, stop_cond_fn=None): """ Generator that tails the transcript file and yields SSE data lines. If file doesn't exist yet, yield a short status message then keep waiting. stop_cond_fn is a callable that when returns True will break. """ last_pos = 0 sent_initial = False while True: if stop_cond_fn and stop_cond_fn(): break if os.path.exists(path): with open(path, "r", encoding="utf-8", errors="ignore") as fh: fh.seek(last_pos) lines = fh.readlines() if lines: for ln in lines: ln = ln.strip() if ln: yield f"data: {ln}\n\n" last_pos = fh.tell() sent_initial = True else: # no new lines time.sleep(0.25) else: if not sent_initial: yield "data: [info] Transcript file not yet created. Waiting...\n\n" sent_initial = True time.sleep(0.5) # final notification yield "data: [info] Transcription ended.\n\n" @app.route("/events") def events(): """ SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE. The stream ends when the module stop_event is set and the background recording thread finishes. """ transcript_path = getattr(rte, "TRANSCRIPT_FILE", None) if not transcript_path: return Response("No transcript file configured", status=500) transcript_path = str(transcript_path) def stop_fn(): # stop when the recording thread is no longer alive AND the module stop_event is set cond = False try: cond = (hasattr(rte, 'stop_event') and rte.stop_event is not None and rte.stop_event.is_set()) except Exception: cond = False # also stop if thread finished t_alive = recording_thread.is_alive() if recording_thread is not None else False # If stop requested and thread not alive -> end stream return (cond and not t_alive) return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)), mimetype="text/event-stream") @app.route("/status") def status(): running = False if recording_thread and recording_thread.is_alive(): running = True return jsonify(running=running) if __name__ == "__main__": # run on localhost for local usage app.run(host="0.0.0.0", port=7860, threaded=True)