Spaces:
Sleeping
Sleeping
File size: 12,260 Bytes
4207399 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | from flask import Flask, request, jsonify, send_from_directory, render_template
import threading
import time
import os
import queue
from pathlib import Path
import pyaudio
from werkzeug.utils import secure_filename
from rec_transcribe_extension import Transcriber, diarization_hook
from rec_transcribe_extension import (
list_input_devices,
run_recording,
OUTPUT_DIR,
CHUNKS_DIR,
FINAL_WAV,)
app = Flask(__name__)
recording_thread = None
recording_running = False
recording_status = {
"recording": False,
"live_segments": []
}
# ------ Device Listing API ------
@app.route("/api/devices", methods=["GET"])
def api_devices():
pa = pyaudio.PyAudio()
devices = []
for i in range(pa.get_device_count()):
dev = pa.get_device_info_by_index(i)
if dev.get("maxInputChannels", 0) > 0:
devices.append({"index": dev["index"], "name": dev["name"]})
pa.terminate()
return jsonify({"devices": devices})
# --- Start recording ---
@app.route("/api/start-recording", methods=["POST"])
def api_start_recording():
global recording_thread, stop_event, recording_status
data = request.json
# Validate required fields
try:
mic = int(data.get("mic"))
except Exception:
return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
# sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
sys = None
if data.get("sys") not in (None, "", "null"):
try:
sys = int(data.get("sys"))
except Exception:
return jsonify({"error": "Invalid 'sys' parameter"}), 400
chunk_secs = int(data.get("chunk_secs", 5))
model = data.get("model", "medium")
no_transcribe = bool(data.get("no_transcribe", False))
if recording_status["recording"]:
return jsonify({"error": "Already recording"}), 400
# --- Validate that requested devices exist and have input channels ---
try:
pa = pyaudio.PyAudio()
except Exception as e:
return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
def device_is_valid(device_index):
try:
dev = pa.get_device_info_by_index(device_index)
return dev.get("maxInputChannels", 0) > 0
except Exception:
return False
if not device_is_valid(mic):
pa.terminate()
return jsonify({"error": f"Microphone device index {mic} not found or has no input channels"}), 400
if sys is not None and not device_is_valid(sys):
pa.terminate()
return jsonify({"error": f"System device index {sys} not found or has no input channels"}), 400
pa.terminate()
# Reset state
recording_status["recording"] = True
recording_status["live_segments"] = []
stop_event = threading.Event()
def run():
# Patch: update live_segments after each chunk
from rec_transcribe_extension import chunk_writer_and_transcribe_worker
# Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
import rec_transcribe_extension as rte
orig_worker = rte.chunk_writer_and_transcribe_worker
def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
while True:
try:
filename, frames = in_queue.get(timeout=1.0)
except queue.Empty:
if stop_event.is_set() and in_queue.empty():
break
continue
rte.save_wav_from_frames(
filename, frames, nchannels=rte.CHANNELS)
final_frames_list.extend(frames)
diar = rte.diarization_hook(str(filename))
diar_segments = diar if diar else []
# Transcribe chunk and get segments with timestamps
if transcriber and transcriber.model:
try:
segments, info = transcriber.model.transcribe(
str(filename), beam_size=5)
for seg in segments:
seg_start = seg.start
seg_end = seg.end
seg_text = seg.text.strip()
speaker = "Unknown"
for d_start, d_end, d_speaker in diar_segments:
if (seg_start < d_end) and (seg_end > d_start):
speaker = d_speaker
break
# Update live_segments for frontend
recording_status["live_segments"].append({
"start": float(seg_start),
"end": float(seg_end),
"speaker": str(speaker),
"text": seg_text
})
# Write to transcript file as before
line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
tf.write(line)
except Exception as e:
print(f"Transcription error for {filename.name}: {e}")
print("Chunk writer/transcriber worker exiting.")
rte.chunk_writer_and_transcribe_worker = patched_worker
try:
rte.stop_event = stop_event
run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
model_name=model, no_transcribe=no_transcribe)
finally:
rte.chunk_writer_and_transcribe_worker = orig_worker
recording_status["recording"] = False
recording_thread = threading.Thread(target=run, daemon=True)
recording_thread.start()
return jsonify({"ok": True})
# --- Stop recording ---
@app.route("/api/stop-recording", methods=["POST"])
def api_stop_recording():
global stop_event
if stop_event:
stop_event.set()
return jsonify({"ok": True})
# --- Poll status ---
@app.route("/api/recording-status")
def api_recording_status():
return jsonify(recording_status)
# # serve saved uploads at /uploads/<filename>
# @app.route('/uploads/<path:filename>')
# def serve_uploaded(filename):
# return send_from_directory(str(OUTPUT_DIR), filename)
# # --- upload pre-recorded files ---
# @app.route("/api/upload", methods=["POST"])
# def api_upload_file():
# """
# Accept a single file (form-data 'file'), save it into OUTPUT_DIR and return json
# { ok: True, filename: "<saved_name>", url: "/static/<saved_name>" }.
# """
# if 'file' not in request.files:
# return jsonify({"error": "No file provided"}), 400
# f = request.files['file']
# if f.filename == '':
# return jsonify({"error": "Empty filename"}), 400
# safe_name = secure_filename(f.filename)
# # prefix timestamp to avoid collisions
# ts = int(time.time() * 1000)
# saved_name = f"{ts}_{safe_name}"
# saved_path = OUTPUT_DIR / saved_name
# try:
# f.save(str(saved_path))
# except Exception as e:
# return jsonify({"error": f"Failed to save file: {e}"}), 500
# return jsonify({"ok": True, "filename": saved_name, "url": f"/static/{saved_name}"})
# # --- Start server-side paced transcription for a saved WAV/MP3 file ---
# @app.route("/api/start-transcribe-file", methods=["POST"])
# def api_start_transcribe_file():
# """
# POST JSON { filename: "<saved_name>" }
# Spawns a background thread that transcribes the file using the Transcriber,
# and appends transcribed segments (with start/end/speaker/text) into
# recording_status["live_segments"] while setting recording_status["recording"]=True.
# The worker will pace segments to approximate 'live' streaming using seg.start timestamps.
# """
# global recording_status
# data = request.json or {}
# filename = data.get("filename")
# print("DEBUG: /api/start-transcribe-file called with:", filename, flush=True)
# if not filename:
# return jsonify({"error": "Missing 'filename'"}), 400
# file_path = OUTPUT_DIR / filename
# if not file_path.exists():
# return jsonify({"error": "File not found on server"}), 404
# # prevent concurrent transcription runs
# if recording_status.get("recording"):
# return jsonify({"error": "Another transcription/recording is already running"}), 400
# def worker():
# try:
# recording_status["recording"] = True
# recording_status["live_segments"] = []
# transcriber = Transcriber()
# if not transcriber.model:
# # model not loaded/available
# recording_status["recording"] = False
# print("Transcription model not available; cannot transcribe file.")
# return
# # perform diarization if available
# diar_segments = diarization_hook(str(file_path)) or []
# # get segments from model
# try:
# segments, info = transcriber.model.transcribe(str(file_path), beam_size=5)
# except Exception as e:
# print("Error during transcription:", e)
# recording_status["recording"] = False
# return
# # Stream the segments into recording_status with timing
# start_clock = time.time()
# for seg in segments:
# # seg.start is seconds into the audio
# wait_for = seg.start - (time.time() - start_clock)
# if wait_for > 0:
# time.sleep(wait_for)
# # map speaker using diarization segments (best-effort overlap)
# speaker = "Unknown"
# for d_start, d_end, d_label in diar_segments:
# if (seg.start < d_end) and (seg.end > d_start):
# speaker = d_label
# break
# seg_obj = {
# "start": float(seg.start),
# "end": float(seg.end),
# "speaker": str(speaker),
# "text": seg.text.strip()
# }
# # append to shared status for frontend polling
# recording_status.setdefault("live_segments", []).append(seg_obj)
# # also append to transcript file for persistence (optional)
# with open(rec_transcribe_extension.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
# line = f"[{filename}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
# tf.write(line)
# # done streaming
# recording_status["recording"] = False
# except Exception as e:
# print("Error in transcription worker:", e)
# recording_status["recording"] = False
# t = threading.Thread(target=worker, daemon=True)
# t.start()
# return jsonify({"ok": True})
# --- List final files ---
@app.route("/api/final-files")
def api_final_files():
files = []
out_dir = OUTPUT_DIR
for fname in os.listdir(out_dir):
if fname.endswith(".wav") or fname.endswith(".txt"):
files.append(
{"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"})
return jsonify({"files": files})
# --- Serve static files (WAV, TXT) ---
@app.route('/static/<path:filename>')
def static_files(filename):
return send_from_directory(OUTPUT_DIR, filename)
# --- Serve the frontend ---
@app.route("/")
def index():
return render_template("index2.html")
if __name__ == "__main__":
app.run(port=5000, debug=True)
|