AudioTransDiar / app2.py
prthm11's picture
Upload 12 files
4207399 verified
# app.py
"""
Flask app to:
1) serve the provided upload template,
2) accept .mp3/.wav uploads and show an audio player,
3) start/stop recording from a system loopback device when the audio element plays/pauses,
4) stream live transcription back to the browser via Server-Sent Events (SSE).
Notes:
- Uses rec_transcribe_extension.run_recording to capture audio and (optionally) transcribe chunks.
- Transcription streaming is implemented by tailing rec_transcribe_extension.TRANSCRIPT_FILE.
- This app assumes it runs on the same machine that has access to the local audio devices.
"""
import os
import time
import threading
import pathlib
from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
from werkzeug.utils import secure_filename
# import your recorder/transcriber helper (uploaded by you)
import rec_transcribe_extension as rte
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
app = Flask(__name__, static_folder=None)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Globals for recording thread management
recording_thread = None
recording_lock = threading.Lock()
def allowed_file(filename):
ext = pathlib.Path(filename).suffix.lower()
return ext in ALLOWED_EXT
def find_system_loopback_index():
"""
Try to find a likely loopback / system audio input device.
Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
'virtual', 'audio cable'. Otherwise fallback to default input device.
"""
pa = None
try:
import pyaudio
pa = pyaudio.PyAudio()
except Exception:
return None
keywords = ["loop", "stereo", "mix", "what u hear",
"virtual", "audio cable", "loopback", "monitor"]
best_idx = None
for i in range(pa.get_device_count()):
try:
dev = pa.get_device_info_by_index(i)
name = (dev.get("name") or "").lower()
max_in = dev.get("maxInputChannels", 0)
if max_in <= 0:
continue
for kw in keywords:
if kw in name:
best_idx = int(dev["index"])
pa.terminate()
return best_idx
except Exception:
continue
# fallback: default input device
try:
default_info = pa.get_default_input_device_info()
idx = int(default_info.get("index"))
pa.terminate()
return idx
except Exception:
if pa:
pa.terminate()
return None
@app.route("/", methods=["GET"])
def index():
return render_template("index2_upload.html")
@app.route("/upload", methods=["POST"])
def upload():
if 'file' not in request.files:
return jsonify(success=False, error="No file part"), 400
f = request.files['file']
if f.filename == '':
return jsonify(success=False, error="Empty filename"), 400
filename = secure_filename(f.filename)
if not allowed_file(filename):
return jsonify(success=False, error="Extension not allowed"), 400
# avoid collisions by prefixing timestamp
ts = int(time.time() * 1000)
filename = f"{ts}_{filename}"
save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
f.save(save_path)
url = f"/uploads/{filename}"
return jsonify(success=True, url=url, filename=filename)
@app.route("/uploads/<path:filename>")
def uploaded_file(filename):
return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
@app.route("/start", methods=["POST"])
def start_recording():
"""
Start a background thread which calls rec_transcribe_extension.run_recording(...)
We try to detect a loopback device; if not found we pick the default input device.
"""
global recording_thread
body = request.get_json(force=True, silent=True) or {}
filename = body.get('filename')
# Basic check: uploaded file exists (we don't actually play the file on the server,
# but it's a sanity check so user didn't start without uploading)
if filename:
if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
return jsonify(success=False, error="Uploaded file not found on server"), 400
with recording_lock:
# if there's an active recording, return ok
if recording_thread and recording_thread.is_alive():
return jsonify(success=True, message="Recording already running")
# clear any previous stop_event
try:
if hasattr(rte, 'stop_event'):
rte.stop_event = threading.Event() # new event the run_recording will wait on
except Exception:
pass
# choose device: prefer loopback
dev_index = find_system_loopback_index()
if dev_index is None:
return jsonify(success=False, error="No suitable audio input device found on server"), 500
# Start the recording in a background thread
def target():
try:
from rec_transcribe_extension import chunk_writer_and_transcribe_worker
import rec_transcribe_extension as rte
orig_worker = rte.chunk_writer_and_transcribe_worker
def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
while True:
try:
filename, frames = in_queue.get(timeout=1.0)
except queue.Empty:
if rte.stop_event.is_set() and in_queue.empty():
break
continue
rte.save_wav_from_frames(
filename, frames, nchannels=rte.CHANNELS)
final_frames_list.extend(frames)
diar_segments = rte.diarization_hook(str(filename)) or []
if transcriber and transcriber.model:
try:
segments, info = transcriber.model.transcribe(
str(filename), beam_size=5)
for seg in segments:
seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
speaker = "Unknown"
for d_start, d_end, d_speaker in diar_segments:
if (seg_start < d_end) and (seg_end > d_start):
speaker = d_speaker
break
# Write formatted diarization line
line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
tf.write(line)
except Exception as e:
print(f"Transcription error for {filename}: {e}")
print("Patched worker exiting.")
# apply patch
rte.chunk_writer_and_transcribe_worker = patched_worker
try:
rte.run_recording(mic_index=dev_index, sys_index=None,
chunk_secs=getattr(
rte, 'CHUNK_DURATION_SECS', 3),
model_name=getattr(rte, 'MODEL_NAME', None),
no_transcribe=False)
finally:
rte.chunk_writer_and_transcribe_worker = orig_worker
except Exception as e:
print("run_recording exception:", e)
@app.route("/stop", methods=["POST"])
def stop_recording():
"""
Signal the rec_transcribe_extension stop_event to stop gracefully.
"""
global recording_thread
with recording_lock:
# set the stop_event in module
if hasattr(rte, 'stop_event') and rte.stop_event is not None:
try:
rte.stop_event.set()
except Exception:
pass
return jsonify(success=True, message="Stop signal sent")
def tail_transcript_file(path, stop_cond_fn=None):
"""
Generator that tails the transcript file and yields SSE data lines.
If file doesn't exist yet, yield a short status message then keep waiting.
stop_cond_fn is a callable that when returns True will break.
"""
last_pos = 0
sent_initial = False
while True:
if stop_cond_fn and stop_cond_fn():
break
if os.path.exists(path):
with open(path, "r", encoding="utf-8", errors="ignore") as fh:
fh.seek(last_pos)
lines = fh.readlines()
if lines:
for ln in lines:
ln = ln.strip()
if ln:
yield f"data: {ln}\n\n"
last_pos = fh.tell()
sent_initial = True
else:
# no new lines
time.sleep(0.25)
else:
if not sent_initial:
yield "data: [info] Transcript file not yet created. Waiting...\n\n"
sent_initial = True
time.sleep(0.5)
# final notification
yield "data: [info] Transcription ended.\n\n"
@app.route("/events")
def events():
"""
SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
The stream ends when the module stop_event is set and the background recording thread finishes.
"""
transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
if not transcript_path:
return Response("No transcript file configured", status=500)
transcript_path = str(transcript_path)
def stop_fn():
# stop when the recording thread is no longer alive AND the module stop_event is set
cond = False
try:
cond = (hasattr(rte, 'stop_event')
and rte.stop_event is not None and rte.stop_event.is_set())
except Exception:
cond = False
# also stop if thread finished
t_alive = recording_thread.is_alive() if recording_thread is not None else False
# If stop requested and thread not alive -> end stream
return (cond and not t_alive)
return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
mimetype="text/event-stream")
@app.route("/status")
def status():
running = False
if recording_thread and recording_thread.is_alive():
running = True
return jsonify(running=running)
if __name__ == "__main__":
# run on localhost for local usage
app.run(host="0.0.0.0", port=7860, threaded=True)