Spaces:
Sleeping
Sleeping
| # app.py | |
| """ | |
| Flask app to: | |
| 1) serve the provided upload template, | |
| 2) accept .mp3/.wav uploads and show an audio player, | |
| 3) start/stop recording from a system loopback device when the audio element plays/pauses, | |
| 4) stream live transcription back to the browser via Server-Sent Events (SSE). | |
| Notes: | |
| - Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks. | |
| - Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE. | |
| - This app assumes it runs on the same machine that has access to the local audio devices. | |
| """ | |
| import os | |
| import time | |
| import threading | |
| import pathlib | |
| from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template | |
| from werkzeug.utils import secure_filename | |
| # import your recorder/transcriber helper (uploaded by you) | |
| import rec_transcribe_extension as rte | |
| UPLOAD_FOLDER = "uploads" | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'} | |
| app = Flask(__name__, static_folder=None) | |
| app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
| # Globals for recording thread management | |
| recording_thread = None | |
| recording_lock = threading.Lock() | |
| def allowed_file(filename): | |
| ext = pathlib.Path(filename).suffix.lower() | |
| return ext in ALLOWED_EXT | |
| def find_system_loopback_index(): | |
| """ | |
| Try to find a likely loopback / system audio input device. | |
| Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear', | |
| 'virtual', 'audio cable'. Otherwise fallback to default input device. | |
| """ | |
| pa = None | |
| try: | |
| import pyaudio | |
| pa = pyaudio.PyAudio() | |
| except Exception: | |
| return None | |
| keywords = ["loop", "stereo", "mix", "what u hear", | |
| "virtual", "audio cable", "loopback", "monitor"] | |
| best_idx = None | |
| for i in range(pa.get_device_count()): | |
| try: | |
| dev = pa.get_device_info_by_index(i) | |
| name = (dev.get("name") or "").lower() | |
| max_in = dev.get("maxInputChannels", 0) | |
| if max_in <= 0: | |
| continue | |
| for kw in keywords: | |
| if kw in name: | |
| best_idx = int(dev["index"]) | |
| pa.terminate() | |
| return best_idx | |
| except Exception: | |
| continue | |
| # fallback: default input device | |
| try: | |
| default_info = pa.get_default_input_device_info() | |
| idx = int(default_info.get("index")) | |
| pa.terminate() | |
| return idx | |
| except Exception: | |
| if pa: | |
| pa.terminate() | |
| return None | |
| def index(): | |
| return render_template("index2_upload.html") | |
| def upload(): | |
| if 'file' not in request.files: | |
| return jsonify(success=False, error="No file part"), 400 | |
| f = request.files['file'] | |
| if f.filename == '': | |
| return jsonify(success=False, error="Empty filename"), 400 | |
| filename = secure_filename(f.filename) | |
| if not allowed_file(filename): | |
| return jsonify(success=False, error="Extension not allowed"), 400 | |
| # avoid collisions by prefixing timestamp | |
| ts = int(time.time() * 1000) | |
| filename = f"{ts}_{filename}" | |
| save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) | |
| f.save(save_path) | |
| url = f"/uploads/{filename}" | |
| return jsonify(success=True, url=url, filename=filename) | |
| def uploaded_file(filename): | |
| return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False) | |
| def start_recording(): | |
| """ | |
| Start a background thread which calls rec_transcribe_extension.run_recording(...) | |
| We try to detect a loopback device; if not found we pick the default input device. | |
| """ | |
| global recording_thread | |
| body = request.get_json(force=True, silent=True) or {} | |
| filename = body.get('filename') | |
| # Basic check: uploaded file exists (we don't actually play the file on the server, | |
| # but it's a sanity check so user didn't start without uploading) | |
| if filename: | |
| if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)): | |
| return jsonify(success=False, error="Uploaded file not found on server"), 400 | |
| with recording_lock: | |
| # if there's an active recording, return ok | |
| if recording_thread and recording_thread.is_alive(): | |
| return jsonify(success=True, message="Recording already running") | |
| # clear any previous stop_event | |
| try: | |
| if hasattr(rte, 'stop_event'): | |
| rte.stop_event = threading.Event() # new event the run_recording will wait on | |
| except Exception: | |
| pass | |
| # choose device: prefer loopback | |
| dev_index = find_system_loopback_index() | |
| if dev_index is None: | |
| return jsonify(success=False, error="No suitable audio input device found on server"), 500 | |
| # Start the recording in a background thread | |
| def target(): | |
| try: | |
| from rec_transcribe_extension import chunk_writer_and_transcribe_worker | |
| import rec_transcribe_extension as rte | |
| orig_worker = rte.chunk_writer_and_transcribe_worker | |
| def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"): | |
| while True: | |
| try: | |
| filename, frames = in_queue.get(timeout=1.0) | |
| except queue.Empty: | |
| if rte.stop_event.is_set() and in_queue.empty(): | |
| break | |
| continue | |
| rte.save_wav_from_frames( | |
| filename, frames, nchannels=rte.CHANNELS) | |
| final_frames_list.extend(frames) | |
| diar_segments = rte.diarization_hook(str(filename)) or [] | |
| if transcriber and transcriber.model: | |
| try: | |
| segments, info = transcriber.model.transcribe( | |
| str(filename), beam_size=5) | |
| for seg in segments: | |
| seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip() | |
| speaker = "Unknown" | |
| for d_start, d_end, d_speaker in diar_segments: | |
| if (seg_start < d_end) and (seg_end > d_start): | |
| speaker = d_speaker | |
| break | |
| # Write formatted diarization line | |
| line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n" | |
| with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf: | |
| tf.write(line) | |
| except Exception as e: | |
| print(f"Transcription error for {filename}: {e}") | |
| print("Patched worker exiting.") | |
| # apply patch | |
| rte.chunk_writer_and_transcribe_worker = patched_worker | |
| try: | |
| rte.run_recording(mic_index=dev_index, sys_index=None, | |
| chunk_secs=getattr( | |
| rte, 'CHUNK_DURATION_SECS', 3), | |
| model_name=getattr(rte, 'MODEL_NAME', None), | |
| no_transcribe=False) | |
| finally: | |
| rte.chunk_writer_and_transcribe_worker = orig_worker | |
| except Exception as e: | |
| print("run_recording exception:", e) | |
| def stop_recording(): | |
| """ | |
| Signal the rec_transcribe_extension stop_event to stop gracefully. | |
| """ | |
| global recording_thread | |
| with recording_lock: | |
| # set the stop_event in module | |
| if hasattr(rte, 'stop_event') and rte.stop_event is not None: | |
| try: | |
| rte.stop_event.set() | |
| except Exception: | |
| pass | |
| return jsonify(success=True, message="Stop signal sent") | |
| def tail_transcript_file(path, stop_cond_fn=None): | |
| """ | |
| Generator that tails the transcript file and yields SSE data lines. | |
| If file doesn't exist yet, yield a short status message then keep waiting. | |
| stop_cond_fn is a callable that when returns True will break. | |
| """ | |
| last_pos = 0 | |
| sent_initial = False | |
| while True: | |
| if stop_cond_fn and stop_cond_fn(): | |
| break | |
| if os.path.exists(path): | |
| with open(path, "r", encoding="utf-8", errors="ignore") as fh: | |
| fh.seek(last_pos) | |
| lines = fh.readlines() | |
| if lines: | |
| for ln in lines: | |
| ln = ln.strip() | |
| if ln: | |
| yield f"data: {ln}\n\n" | |
| last_pos = fh.tell() | |
| sent_initial = True | |
| else: | |
| # no new lines | |
| time.sleep(0.25) | |
| else: | |
| if not sent_initial: | |
| yield "data: [info] Transcript file not yet created. Waiting...\n\n" | |
| sent_initial = True | |
| time.sleep(0.5) | |
| # final notification | |
| yield "data: [info] Transcription ended.\n\n" | |
| def events(): | |
| """ | |
| SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE. | |
| The stream ends when the module stop_event is set and the background recording thread finishes. | |
| """ | |
| transcript_path = getattr(rte, "TRANSCRIPT_FILE", None) | |
| if not transcript_path: | |
| return Response("No transcript file configured", status=500) | |
| transcript_path = str(transcript_path) | |
| def stop_fn(): | |
| # stop when the recording thread is no longer alive AND the module stop_event is set | |
| cond = False | |
| try: | |
| cond = (hasattr(rte, 'stop_event') | |
| and rte.stop_event is not None and rte.stop_event.is_set()) | |
| except Exception: | |
| cond = False | |
| # also stop if thread finished | |
| t_alive = recording_thread.is_alive() if recording_thread is not None else False | |
| # If stop requested and thread not alive -> end stream | |
| return (cond and not t_alive) | |
| return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)), | |
| mimetype="text/event-stream") | |
| def status(): | |
| running = False | |
| if recording_thread and recording_thread.is_alive(): | |
| running = True | |
| return jsonify(running=running) | |
| if __name__ == "__main__": | |
| # run on localhost for local usage | |
| app.run(host="0.0.0.0", port=7860, threaded=True) | |