Spaces:

prthm11
/

AudioTransDiar

Sleeping

App Files Files Community

AudioTransDiar / app2.py

prthm11

Upload 12 files

4207399 verified 6 months ago

raw

history blame contribute delete

11.4 kB

	# app.py
	"""
	Flask app to:
	1) serve the provided upload template,
	2) accept .mp3/.wav uploads and show an audio player,
	3) start/stop recording from a system loopback device when the audio element plays/pauses,
	4) stream live transcription back to the browser via Server-Sent Events (SSE).

	Notes:
	- Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.
	- Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.
	- This app assumes it runs on the same machine that has access to the local audio devices.
	"""
	import os
	import time
	import threading
	import pathlib
	from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
	from werkzeug.utils import secure_filename

	# import your recorder/transcriber helper (uploaded by you)
	import rec_transcribe_extension as rte

	UPLOAD_FOLDER = "uploads"
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}

	app = Flask(__name__, static_folder=None)
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

	# Globals for recording thread management
	recording_thread = None
	recording_lock = threading.Lock()


	def allowed_file(filename):
	ext = pathlib.Path(filename).suffix.lower()
	return ext in ALLOWED_EXT


	def find_system_loopback_index():
	"""
	Try to find a likely loopback / system audio input device.
	Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
	'virtual', 'audio cable'. Otherwise fallback to default input device.
	"""
	pa = None
	try:
	import pyaudio
	pa = pyaudio.PyAudio()
	except Exception:
	return None

	keywords = ["loop", "stereo", "mix", "what u hear",
	"virtual", "audio cable", "loopback", "monitor"]
	best_idx = None
	for i in range(pa.get_device_count()):
	try:
	dev = pa.get_device_info_by_index(i)
	name = (dev.get("name") or "").lower()
	max_in = dev.get("maxInputChannels", 0)
	if max_in <= 0:
	continue
	for kw in keywords:
	if kw in name:
	best_idx = int(dev["index"])
	pa.terminate()
	return best_idx
	except Exception:
	continue

	# fallback: default input device
	try:
	default_info = pa.get_default_input_device_info()
	idx = int(default_info.get("index"))
	pa.terminate()
	return idx
	except Exception:
	if pa:
	pa.terminate()
	return None


	@app.route("/", methods=["GET"])
	def index():
	return render_template("index2_upload.html")


	@app.route("/upload", methods=["POST"])
	def upload():
	if 'file' not in request.files:
	return jsonify(success=False, error="No file part"), 400
	f = request.files['file']
	if f.filename == '':
	return jsonify(success=False, error="Empty filename"), 400
	filename = secure_filename(f.filename)
	if not allowed_file(filename):
	return jsonify(success=False, error="Extension not allowed"), 400

	# avoid collisions by prefixing timestamp
	ts = int(time.time() * 1000)
	filename = f"{ts}_{filename}"
	save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
	f.save(save_path)
	url = f"/uploads/{filename}"
	return jsonify(success=True, url=url, filename=filename)


	@app.route("/uploads/<path:filename>")
	def uploaded_file(filename):
	return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)


	@app.route("/start", methods=["POST"])
	def start_recording():
	"""
	Start a background thread which calls rec_transcribe_extension.run_recording(...)
	We try to detect a loopback device; if not found we pick the default input device.
	"""
	global recording_thread
	body = request.get_json(force=True, silent=True) or {}
	filename = body.get('filename')

	# Basic check: uploaded file exists (we don't actually play the file on the server,
	# but it's a sanity check so user didn't start without uploading)
	if filename:
	if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
	return jsonify(success=False, error="Uploaded file not found on server"), 400

	with recording_lock:
	# if there's an active recording, return ok
	if recording_thread and recording_thread.is_alive():
	return jsonify(success=True, message="Recording already running")
	# clear any previous stop_event
	try:
	if hasattr(rte, 'stop_event'):
	rte.stop_event = threading.Event() # new event the run_recording will wait on
	except Exception:
	pass

	# choose device: prefer loopback
	dev_index = find_system_loopback_index()
	if dev_index is None:
	return jsonify(success=False, error="No suitable audio input device found on server"), 500

	# Start the recording in a background thread
	def target():
	try:
	from rec_transcribe_extension import chunk_writer_and_transcribe_worker
	import rec_transcribe_extension as rte
	orig_worker = rte.chunk_writer_and_transcribe_worker

	def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
	while True:
	try:
	filename, frames = in_queue.get(timeout=1.0)
	except queue.Empty:
	if rte.stop_event.is_set() and in_queue.empty():
	break
	continue

	rte.save_wav_from_frames(
	filename, frames, nchannels=rte.CHANNELS)
	final_frames_list.extend(frames)

	diar_segments = rte.diarization_hook(str(filename)) or []

	if transcriber and transcriber.model:
	try:
	segments, info = transcriber.model.transcribe(
	str(filename), beam_size=5)
	for seg in segments:
	seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
	speaker = "Unknown"
	for d_start, d_end, d_speaker in diar_segments:
	if (seg_start < d_end) and (seg_end > d_start):
	speaker = d_speaker
	break
	# Write formatted diarization line
	line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
	with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
	tf.write(line)
	except Exception as e:
	print(f"Transcription error for {filename}: {e}")

	print("Patched worker exiting.")

	# apply patch
	rte.chunk_writer_and_transcribe_worker = patched_worker
	try:
	rte.run_recording(mic_index=dev_index, sys_index=None,
	chunk_secs=getattr(
	rte, 'CHUNK_DURATION_SECS', 3),
	model_name=getattr(rte, 'MODEL_NAME', None),
	no_transcribe=False)
	finally:
	rte.chunk_writer_and_transcribe_worker = orig_worker
	except Exception as e:
	print("run_recording exception:", e)


	@app.route("/stop", methods=["POST"])
	def stop_recording():
	"""
	Signal the rec_transcribe_extension stop_event to stop gracefully.
	"""
	global recording_thread
	with recording_lock:
	# set the stop_event in module
	if hasattr(rte, 'stop_event') and rte.stop_event is not None:
	try:
	rte.stop_event.set()
	except Exception:
	pass
	return jsonify(success=True, message="Stop signal sent")


	def tail_transcript_file(path, stop_cond_fn=None):
	"""
	Generator that tails the transcript file and yields SSE data lines.
	If file doesn't exist yet, yield a short status message then keep waiting.
	stop_cond_fn is a callable that when returns True will break.
	"""
	last_pos = 0
	sent_initial = False
	while True:
	if stop_cond_fn and stop_cond_fn():
	break
	if os.path.exists(path):
	with open(path, "r", encoding="utf-8", errors="ignore") as fh:
	fh.seek(last_pos)
	lines = fh.readlines()
	if lines:
	for ln in lines:
	ln = ln.strip()
	if ln:
	yield f"data: {ln}\n\n"
	last_pos = fh.tell()
	sent_initial = True
	else:
	# no new lines
	time.sleep(0.25)
	else:
	if not sent_initial:
	yield "data: [info] Transcript file not yet created. Waiting...\n\n"
	sent_initial = True
	time.sleep(0.5)
	# final notification
	yield "data: [info] Transcription ended.\n\n"


	@app.route("/events")
	def events():
	"""
	SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
	The stream ends when the module stop_event is set and the background recording thread finishes.
	"""
	transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
	if not transcript_path:
	return Response("No transcript file configured", status=500)
	transcript_path = str(transcript_path)

	def stop_fn():
	# stop when the recording thread is no longer alive AND the module stop_event is set
	cond = False
	try:
	cond = (hasattr(rte, 'stop_event')
	and rte.stop_event is not None and rte.stop_event.is_set())
	except Exception:
	cond = False
	# also stop if thread finished
	t_alive = recording_thread.is_alive() if recording_thread is not None else False
	# If stop requested and thread not alive -> end stream
	return (cond and not t_alive)

	return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
	mimetype="text/event-stream")


	@app.route("/status")
	def status():
	running = False
	if recording_thread and recording_thread.is_alive():
	running = True
	return jsonify(running=running)


	if __name__ == "__main__":
	# run on localhost for local usage
	app.run(host="0.0.0.0", port=7860, threaded=True)