from flask import Flask, request, jsonify, send_from_directory, render_template import threading import time import os import queue from pathlib import Path import pyaudio from werkzeug.utils import secure_filename from rec_transcribe_extension import Transcriber, diarization_hook from rec_transcribe_extension import ( list_input_devices, run_recording, OUTPUT_DIR, CHUNKS_DIR, FINAL_WAV,) app = Flask(__name__) recording_thread = None recording_running = False recording_status = { "recording": False, "live_segments": [] } # ------ Device Listing API ------ @app.route("/api/devices", methods=["GET"]) def api_devices(): pa = pyaudio.PyAudio() devices = [] for i in range(pa.get_device_count()): dev = pa.get_device_info_by_index(i) if dev.get("maxInputChannels", 0) > 0: devices.append({"index": dev["index"], "name": dev["name"]}) pa.terminate() return jsonify({"devices": devices}) # --- Start recording --- @app.route("/api/start-recording", methods=["POST"]) def api_start_recording(): global recording_thread, stop_event, recording_status data = request.json # Validate required fields try: mic = int(data.get("mic")) except Exception: return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400 # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None sys = None if data.get("sys") not in (None, "", "null"): try: sys = int(data.get("sys")) except Exception: return jsonify({"error": "Invalid 'sys' parameter"}), 400 chunk_secs = int(data.get("chunk_secs", 5)) model = data.get("model", "medium") no_transcribe = bool(data.get("no_transcribe", False)) if recording_status["recording"]: return jsonify({"error": "Already recording"}), 400 # --- Validate that requested devices exist and have input channels --- try: pa = pyaudio.PyAudio() except Exception as e: return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500 def device_is_valid(device_index): try: dev = pa.get_device_info_by_index(device_index) return dev.get("maxInputChannels", 0) > 0 except Exception: return False if not device_is_valid(mic): pa.terminate() return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400 if sys is not None and not device_is_valid(sys): pa.terminate() return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400 pa.terminate() # Reset state recording_status["recording"] = True recording_status["live_segments"] = [] stop_event = threading.Event() def run(): # Patch: update live_segments after each chunk from rec_transcribe_extension import chunk_writer_and_transcribe_worker # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments import rec_transcribe_extension as rte orig_worker = rte.chunk_writer_and_transcribe_worker def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"): while True: try: filename, frames = in_queue.get(timeout=1.0) except queue.Empty: if stop_event.is_set() and in_queue.empty(): break continue rte.save_wav_from_frames( filename, frames, nchannels=rte.CHANNELS) final_frames_list.extend(frames) diar = rte.diarization_hook(str(filename)) diar_segments = diar if diar else [] # Transcribe chunk and get segments with timestamps if transcriber and transcriber.model: try: segments, info = transcriber.model.transcribe( str(filename), beam_size=5) for seg in segments: seg_start = seg.start seg_end = seg.end seg_text = seg.text.strip() speaker = "Unknown" for d_start, d_end, d_speaker in diar_segments: if (seg_start < d_end) and (seg_end > d_start): speaker = d_speaker break # Update live_segments for frontend recording_status["live_segments"].append({ "start": float(seg_start), "end": float(seg_end), "speaker": str(speaker), "text": seg_text }) # Write to transcript file as before line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n" with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf: tf.write(line) except Exception as e: print(f"Transcription error for {filename.name}: {e}") print("Chunk writer/transcriber worker exiting.") rte.chunk_writer_and_transcribe_worker = patched_worker try: rte.stop_event = stop_event run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs, model_name=model, no_transcribe=no_transcribe) finally: rte.chunk_writer_and_transcribe_worker = orig_worker recording_status["recording"] = False recording_thread = threading.Thread(target=run, daemon=True) recording_thread.start() return jsonify({"ok": True}) # --- Stop recording --- @app.route("/api/stop-recording", methods=["POST"]) def api_stop_recording(): global stop_event if stop_event: stop_event.set() return jsonify({"ok": True}) # --- Poll status --- @app.route("/api/recording-status") def api_recording_status(): return jsonify(recording_status) # # serve saved uploads at /uploads/ # @app.route('/uploads/') # def serve_uploaded(filename): # return send_from_directory(str(OUTPUT_DIR), filename) # # --- upload pre-recorded files --- # @app.route("/api/upload", methods=["POST"]) # def api_upload_file(): # """ # Accept a single file (form-data 'file'), save it into OUTPUT_DIR and return json # { ok: True, filename: "", url: "/static/" }. # """ # if 'file' not in request.files: # return jsonify({"error": "No file provided"}), 400 # f = request.files['file'] # if f.filename == '': # return jsonify({"error": "Empty filename"}), 400 # safe_name = secure_filename(f.filename) # # prefix timestamp to avoid collisions # ts = int(time.time() * 1000) # saved_name = f"{ts}_{safe_name}" # saved_path = OUTPUT_DIR / saved_name # try: # f.save(str(saved_path)) # except Exception as e: # return jsonify({"error": f"Failed to save file: {e}"}), 500 # return jsonify({"ok": True, "filename": saved_name, "url": f"/static/{saved_name}"}) # # --- Start server-side paced transcription for a saved WAV/MP3 file --- # @app.route("/api/start-transcribe-file", methods=["POST"]) # def api_start_transcribe_file(): # """ # POST JSON { filename: "" } # Spawns a background thread that transcribes the file using the Transcriber, # and appends transcribed segments (with start/end/speaker/text) into # recording_status["live_segments"] while setting recording_status["recording"]=True. # The worker will pace segments to approximate 'live' streaming using seg.start timestamps. # """ # global recording_status # data = request.json or {} # filename = data.get("filename") # print("DEBUG: /api/start-transcribe-file called with:", filename, flush=True) # if not filename: # return jsonify({"error": "Missing 'filename'"}), 400 # file_path = OUTPUT_DIR / filename # if not file_path.exists(): # return jsonify({"error": "File not found on server"}), 404 # # prevent concurrent transcription runs # if recording_status.get("recording"): # return jsonify({"error": "Another transcription/recording is already running"}), 400 # def worker(): # try: # recording_status["recording"] = True # recording_status["live_segments"] = [] # transcriber = Transcriber() # if not transcriber.model: # # model not loaded/available # recording_status["recording"] = False # print("Transcription model not available; cannot transcribe file.") # return # # perform diarization if available # diar_segments = diarization_hook(str(file_path)) or [] # # get segments from model # try: # segments, info = transcriber.model.transcribe(str(file_path), beam_size=5) # except Exception as e: # print("Error during transcription:", e) # recording_status["recording"] = False # return # # Stream the segments into recording_status with timing # start_clock = time.time() # for seg in segments: # # seg.start is seconds into the audio # wait_for = seg.start - (time.time() - start_clock) # if wait_for > 0: # time.sleep(wait_for) # # map speaker using diarization segments (best-effort overlap) # speaker = "Unknown" # for d_start, d_end, d_label in diar_segments: # if (seg.start < d_end) and (seg.end > d_start): # speaker = d_label # break # seg_obj = { # "start": float(seg.start), # "end": float(seg.end), # "speaker": str(speaker), # "text": seg.text.strip() # } # # append to shared status for frontend polling # recording_status.setdefault("live_segments", []).append(seg_obj) # # also append to transcript file for persistence (optional) # with open(rec_transcribe_extension.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf: # line = f"[{filename}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n" # tf.write(line) # # done streaming # recording_status["recording"] = False # except Exception as e: # print("Error in transcription worker:", e) # recording_status["recording"] = False # t = threading.Thread(target=worker, daemon=True) # t.start() # return jsonify({"ok": True}) # --- List final files --- @app.route("/api/final-files") def api_final_files(): files = [] out_dir = OUTPUT_DIR for fname in os.listdir(out_dir): if fname.endswith(".wav") or fname.endswith(".txt"): files.append( {"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"}) return jsonify({"files": files}) # --- Serve static files (WAV, TXT) --- @app.route('/static/') def static_files(filename): return send_from_directory(OUTPUT_DIR, filename) # --- Serve the frontend --- @app.route("/") def index(): return render_template("index2.html") if __name__ == "__main__": app.run(port=5000, debug=True)