prthm11 commited on
Commit
0ba7c0e
·
verified ·
1 Parent(s): 2a26bdf

Update merged.py

Browse files
Files changed (1) hide show
  1. merged.py +273 -395
merged.py CHANGED
@@ -1,82 +1,125 @@
1
- # main.py
2
  import os
3
  import time
4
  import threading
5
  import queue
6
  import pathlib
7
- import pyaudio
8
  from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
9
  from werkzeug.utils import secure_filename
10
 
11
- # your helper module
12
- import rec_transcribe_extension as rte
13
- from rec_transcribe_extension import Transcriber, diarization_hook, run_recording, OUTPUT_DIR
14
-
15
- app = Flask(__name__)
16
- UPLOAD_FOLDER = "/app/uploads"
17
- # os.makedirs(UPLOAD_FOLDER, exist_ok=True)
18
- app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
21
 
22
-
23
  def allowed_file(filename: str) -> bool:
24
- """Check if file extension is allowed"""
25
  ext = pathlib.Path(filename).suffix.lower()
26
  return ext in ALLOWED_EXT
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # ---------------- Shared state ----------------
30
  recording_thread = None
31
- recording_running = False
32
  recording_lock = threading.Lock()
 
33
 
34
- recording_status = {
35
- "recording": False,
36
- "live_segments": []
37
- }
38
-
39
- # ---------------- Landing + Frontend ----------------
40
  @app.route("/")
41
  def landing():
42
  return render_template("landing.html")
43
 
44
-
45
  @app.route("/live")
46
  def live_page():
47
  return render_template("index2.html")
48
 
49
-
50
  @app.route("/upload")
51
  def upload_page():
52
  return render_template("index2_upload.html")
53
 
54
- # ---------------- Device listing ----------------
55
-
56
-
57
  @app.route("/api/devices", methods=["GET"])
58
  def api_devices():
59
- pa = pyaudio.PyAudio()
60
- devices = []
61
- for i in range(pa.get_device_count()):
62
- dev = pa.get_device_info_by_index(i)
63
- if dev.get("maxInputChannels", 0) > 0:
64
- devices.append({"index": dev["index"], "name": dev["name"]})
65
- pa.terminate()
66
- return jsonify({"devices": devices})
 
 
 
 
 
67
 
68
- # --- Start recording ---
69
  @app.route("/api/start-recording", methods=["POST"])
70
  def api_start_recording():
71
- global recording_thread, stop_event, recording_status
72
- data = request.json
73
- # Validate required fields
 
 
74
  try:
75
  mic = int(data.get("mic"))
76
  except Exception:
77
  return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
78
 
79
- # sys = int(data["sys"]) if data.get("sys") not in (None, "", "null") else None
80
  sys = None
81
  if data.get("sys") not in (None, "", "null"):
82
  try:
@@ -87,12 +130,13 @@ def api_start_recording():
87
  chunk_secs = int(data.get("chunk_secs", 5))
88
  model = data.get("model", "medium")
89
  no_transcribe = bool(data.get("no_transcribe", False))
 
90
  if recording_status["recording"]:
91
  return jsonify({"error": "Already recording"}), 400
92
 
93
- # --- Validate that requested devices exist and have input channels ---
94
  try:
95
- pa = pyaudio.PyAudio()
96
  except Exception as e:
97
  return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
98
 
@@ -113,137 +157,104 @@ def api_start_recording():
113
 
114
  pa.terminate()
115
 
116
- # Reset state
117
  recording_status["recording"] = True
118
  recording_status["live_segments"] = []
119
  stop_event = threading.Event()
120
 
121
  def run():
122
- # Patch: update live_segments after each chunk
123
- from rec_transcribe_extension import chunk_writer_and_transcribe_worker
124
-
125
- # Monkey-patch chunk_writer_and_transcribe_worker to update live_segments
126
- import rec_transcribe_extension as rte
127
- orig_worker = rte.chunk_writer_and_transcribe_worker
128
-
129
- def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
130
- while True:
131
- try:
132
- filename, frames = in_queue.get(timeout=1.0)
133
- except queue.Empty:
134
- if stop_event.is_set() and in_queue.empty():
135
- break
136
- continue
137
-
138
- rte.save_wav_from_frames(
139
- filename, frames, nchannels=rte.CHANNELS)
140
- final_frames_list.extend(frames)
141
-
142
- diar = rte.diarization_hook(str(filename))
143
- diar_segments = diar if diar else []
144
-
145
- # Transcribe chunk and get segments with timestamps
146
- if transcriber and transcriber.model:
147
  try:
148
- segments, info = transcriber.model.transcribe(
149
- str(filename), beam_size=5)
150
- for seg in segments:
151
- seg_start = seg.start
152
- seg_end = seg.end
153
- seg_text = seg.text.strip()
154
- speaker = "Unknown"
155
- for d_start, d_end, d_speaker in diar_segments:
156
- if (seg_start < d_end) and (seg_end > d_start):
157
- speaker = d_speaker
158
- break
159
- # Update live_segments for frontend
160
- recording_status["live_segments"].append({
161
- "start": float(seg_start),
162
- "end": float(seg_end),
163
- "speaker": str(speaker),
164
- "text": seg_text
165
- })
166
- # Write to transcript file as before
167
- line = f"[{filename.name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
168
- with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
169
- tf.write(line)
170
- except Exception as e:
171
- print(f"Transcription error for {filename.name}: {e}")
172
- print("Chunk writer/transcriber worker exiting.")
173
-
174
- rte.chunk_writer_and_transcribe_worker = patched_worker
175
- try:
176
- rte.stop_event = stop_event
177
- run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
178
- model_name=model, no_transcribe=no_transcribe)
179
- finally:
180
- rte.chunk_writer_and_transcribe_worker = orig_worker
181
- recording_status["recording"] = False
182
 
183
- recording_thread = threading.Thread(target=run, daemon=True)
184
- recording_thread.start()
185
- return jsonify({"ok": True})
 
 
186
 
187
- # # ---------------- Recording APIs ----------------
188
- # @app.route("/api/start-recording", methods=["POST"])
189
- # def api_start_recording():
190
- # global recording_thread, recording_status
191
- # data = request.json or {}
192
-
193
- # mic = int(data.get("mic", -1))
194
- # sys = data.get("sys")
195
- # if sys in (None, "", "null"):
196
- # sys = None
197
- # else:
198
- # sys = int(sys)
199
-
200
- # chunk_secs = int(data.get("chunk_secs", 5))
201
- # model = data.get("model", "medium")
202
- # no_transcribe = bool(data.get("no_transcribe", False))
203
-
204
- # if recording_status["recording"]:
205
- # return jsonify({"error": "Already recording"}), 400
206
-
207
- # # validate devices
208
- # pa = pyaudio.PyAudio()
209
- # def valid(dev_idx):
210
- # try:
211
- # dev = pa.get_device_info_by_index(dev_idx)
212
- # return dev.get("maxInputChannels", 0) > 0
213
- # except Exception:
214
- # return False
215
- # if not valid(mic):
216
- # pa.terminate()
217
- # return jsonify({"error": f"Mic device {mic} invalid"}), 400
218
- # if sys is not None and not valid(sys):
219
- # pa.terminate()
220
- # return jsonify({"error": f"System device {sys} invalid"}), 400
221
- # pa.terminate()
222
-
223
- # # reset state
224
- # recording_status["recording"] = True
225
- # recording_status["live_segments"] = []
226
- # rte.stop_event = threading.Event()
227
-
228
- # def run():
229
- # try:
230
- # run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
231
- # model_name=model, no_transcribe=no_transcribe)
232
- # finally:
233
- # recording_status["recording"] = False
234
-
235
- # recording_thread = threading.Thread(target=run, daemon=True)
236
- # recording_thread.start()
237
- # return jsonify({"ok": True})
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
 
 
 
 
 
 
 
 
240
  @app.route("/api/stop-recording", methods=["POST"])
241
  def api_stop_recording():
242
- if hasattr(rte, "stop_event") and rte.stop_event:
243
- rte.stop_event.set()
 
 
 
244
  return jsonify({"ok": True})
245
 
246
-
247
  @app.route("/api/recording-status")
248
  def api_recording_status():
249
  return jsonify({
@@ -251,9 +262,7 @@ def api_recording_status():
251
  "live_segments": recording_status.get("live_segments", [])
252
  })
253
 
254
- # ---------------- Upload-based APIs ----------------
255
-
256
-
257
  @app.route("/api/upload", methods=["POST"])
258
  def api_upload_file():
259
  if 'file' not in request.files:
@@ -264,231 +273,86 @@ def api_upload_file():
264
  filename = secure_filename(f.filename)
265
  if not allowed_file(filename):
266
  return jsonify(success=False, error="Extension not allowed"), 400
267
-
268
- # avoid collisions by prefixing timestamp
269
  ts = int(time.time() * 1000)
270
- filename = f"{ts}_{filename}"
271
- save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
272
- f.save(save_path)
273
- url = f"/uploads/{filename}"
274
- return jsonify(success=True, url=url, filename=filename)
275
-
276
- # ---------------- File serving ----------------
277
-
278
 
 
279
  @app.route("/uploads/<path:filename>")
280
  def uploaded_file(filename):
281
  return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
282
 
283
- # @app.route("/api/start-transcribe-file", methods=["POST"])
284
- # def api_start_transcribe_file():
285
- # data = request.json or {}
286
- # filename = data.get("filename")
287
- # file_path = OUTPUT_DIR / filename
288
- # if not file_path.exists():
289
- # return jsonify({"error": "File not found"}), 404
290
-
291
- # if recording_status.get("recording"):
292
- # return jsonify({"error": "Busy"}), 400
293
-
294
- # def worker():
295
- # try:
296
- # recording_status["recording"] = True
297
- # recording_status["live_segments"] = []
298
- # transcriber = Transcriber()
299
- # diar_segments = diarization_hook(str(file_path)) or []
300
- # segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
301
- # start_clock = time.time()
302
- # for seg in segments:
303
- # wait_for = seg.start - (time.time() - start_clock)
304
- # if wait_for > 0:
305
- # time.sleep(wait_for)
306
- # speaker = "Unknown"
307
- # for d_start, d_end, d_label in diar_segments:
308
- # if (seg.start < d_end) and (seg.end > d_start):
309
- # speaker = d_label
310
- # break
311
-
312
- # seg_obj = {
313
- # "start": float(seg.start),
314
- # "end": float(seg.end),
315
- # "speaker": speaker,
316
- # "text": seg.text.strip()
317
- # }
318
- # recording_status["live_segments"].append(seg_obj)
319
-
320
- # # --- NEW: also append to transcript file so /events SSE can stream it ---
321
- # line = f"{seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n"
322
- # with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
323
- # tf.write(line)
324
-
325
- # recording_status["recording"] = False
326
- # except Exception as e:
327
- # print("Error in file transcription:", e)
328
- # recording_status["recording"] = False
329
-
330
- # threading.Thread(target=worker, daemon=True).start()
331
- # return jsonify({"ok": True})
332
-
333
- def find_system_loopback_index():
334
- """
335
- Try to find a likely loopback / system audio input device.
336
- Heuristics: look for device names that contain 'loop', 'stereo', 'mix', 'what u hear',
337
- 'virtual', 'audio cable'. Otherwise fallback to default input device.
338
- """
339
- pa = None
340
- try:
341
- import pyaudio
342
- pa = pyaudio.PyAudio()
343
- except Exception:
344
- return None
345
-
346
- keywords = ["loop", "stereo", "mix", "what u hear", "virtual", "audio cable", "loopback", "monitor"]
347
- best_idx = None
348
- for i in range(pa.get_device_count()):
349
- try:
350
- dev = pa.get_device_info_by_index(i)
351
- name = (dev.get("name") or "").lower()
352
- max_in = dev.get("maxInputChannels", 0)
353
- if max_in <= 0:
354
- continue
355
- for kw in keywords:
356
- if kw in name:
357
- best_idx = int(dev["index"])
358
- pa.terminate()
359
- return best_idx
360
- except Exception:
361
- continue
362
-
363
- try:
364
- default_info = pa.get_default_input_device_info()
365
- idx = int(default_info.get("index"))
366
- pa.terminate()
367
- return idx
368
- except Exception:
369
- if pa:
370
- pa.terminate()
371
- return None
372
-
373
  @app.route("/api/start-transcribe-file", methods=["POST"])
374
  def api_start_transcribe_file():
375
- """
376
- Start a background thread which calls rec_transcribe_extension.run_recording(...)
377
- We try to detect a loopback device; if not found we pick the default input device.
378
- """
379
- global recording_thread
380
- body = request.get_json(force=True, silent=True) or {}
381
- filename = body.get('filename')
382
-
383
- # Basic check: uploaded file exists (we don't actually play the file on the server,
384
- # but it's a sanity check so user didn't start without uploading)
385
- if filename:
386
- if not os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
387
- return jsonify(success=False, error="Uploaded file not found on server"), 400
388
-
389
- with recording_lock:
390
- # if there's an active recording, return ok
391
- if recording_thread and recording_thread.is_alive():
392
- return jsonify(success=True, message="Recording already running")
393
- # clear any previous stop_event
394
  try:
395
- if hasattr(rte, 'stop_event'):
396
- rte.stop_event = threading.Event() # new event the run_recording will wait on
397
- except Exception:
398
- pass
399
-
400
- # choose device: prefer loopback
401
- dev_index = find_system_loopback_index()
402
- if dev_index is None:
403
- return jsonify(success=False, error="No suitable audio input device found on server"), 500
404
-
405
- # Start the recording in a background thread
406
- def target():
407
- try:
408
- from rec_transcribe_extension import chunk_writer_and_transcribe_worker
409
- import rec_transcribe_extension as rte
410
- orig_worker = rte.chunk_writer_and_transcribe_worker
411
-
412
- def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
413
- while True:
414
- try:
415
- filename, frames = in_queue.get(timeout=1.0)
416
- except queue.Empty:
417
- if rte.stop_event.is_set() and in_queue.empty():
418
- break
419
- continue
420
-
421
- rte.save_wav_from_frames(filename, frames, nchannels=rte.CHANNELS)
422
- final_frames_list.extend(frames)
423
-
424
- diar_segments = rte.diarization_hook(str(filename)) or []
425
-
426
- if transcriber and transcriber.model:
427
- try:
428
- segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
429
- for seg in segments:
430
- seg_start, seg_end, seg_text = seg.start, seg.end, seg.text.strip()
431
- speaker = "Unknown"
432
- for d_start, d_end, d_speaker in diar_segments:
433
- if (seg_start < d_end) and (seg_end > d_start):
434
- speaker = d_speaker
435
- break
436
- # Write diarized transcript line
437
- line = f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n"
438
- with open(rte.TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
439
- tf.write(line)
440
- except Exception as e:
441
- print(f"Transcription error for {filename}: {e}")
442
-
443
- print("Patched worker exiting.")
444
-
445
- # Apply patch
446
- rte.chunk_writer_and_transcribe_worker = patched_worker
447
- try:
448
- rte.run_recording(
449
- mic_index=dev_index,
450
- sys_index=None,
451
- chunk_secs=getattr(rte, 'CHUNK_DURATION_SECS', 3),
452
- model_name=getattr(rte, 'MODEL_NAME', None),
453
- no_transcribe=False
454
- )
455
- finally:
456
- rte.chunk_writer_and_transcribe_worker = orig_worker
457
-
458
- except Exception as e:
459
- print("run_recording exception:", e)
460
-
461
-
462
- recording_thread = threading.Thread(target=target, daemon=True)
463
- recording_thread.start()
464
- return jsonify(success=True, message="Recording started", device_index=dev_index)
465
 
466
- # @app.route("/static/<path:filename>")
467
- # def static_files(filename):
468
- # return send_from_directory(OUTPUT_DIR, filename)
469
 
 
470
  @app.route("/stop", methods=["POST"])
471
  def stop_recording():
472
- """
473
- Signal the rec_transcribe_extension stop_event to stop gracefully.
474
- """
475
- global recording_thread
476
- with recording_lock:
477
- # set the stop_event in module
478
- if hasattr(rte, 'stop_event') and rte.stop_event is not None:
479
- try:
480
- rte.stop_event.set()
481
- except Exception:
482
- pass
483
  return jsonify(success=True, message="Stop signal sent")
484
 
485
-
486
  def tail_transcript_file(path, stop_cond_fn=None):
487
- """
488
- Generator that tails the transcript file and yields SSE data lines.
489
- If file doesn't exist yet, yield a short status message then keep waiting.
490
- stop_cond_fn is a callable that when returns True will break.
491
- """
492
  last_pos = 0
493
  sent_initial = False
494
  while True:
@@ -506,52 +370,66 @@ def tail_transcript_file(path, stop_cond_fn=None):
506
  last_pos = fh.tell()
507
  sent_initial = True
508
  else:
509
- # no new lines
510
  time.sleep(0.25)
511
  else:
512
  if not sent_initial:
513
  yield "data: [info] Transcript file not yet created. Waiting...\n\n"
514
  sent_initial = True
515
  time.sleep(0.5)
516
- # final notification
517
  yield "data: [info] Transcription ended.\n\n"
518
- # ---------------- SSE events (from app2) ----------------
519
-
520
 
521
  @app.route("/events")
522
  def events():
523
- """
524
- SSE endpoint that streams new transcript lines from rec_transcribe_extension.TRANSCRIPT_FILE.
525
- The stream ends when the module stop_event is set and the background recording thread finishes.
526
- """
527
- transcript_path = getattr(rte, "TRANSCRIPT_FILE", None)
528
- if not transcript_path:
529
- return Response("No transcript file configured", status=500)
530
- transcript_path = str(transcript_path)
531
-
532
  def stop_fn():
533
- # stop when the recording thread is no longer alive AND the module stop_event is set
534
  cond = False
535
  try:
536
- cond = (hasattr(rte, 'stop_event')
537
- and rte.stop_event is not None and rte.stop_event.is_set())
538
  except Exception:
539
  cond = False
540
- # also stop if thread finished
541
- t_alive = recording_thread.is_alive() if recording_thread is not None else False
542
- # If stop requested and thread not alive -> end stream
 
 
543
  return (cond and not t_alive)
544
-
545
- return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)),
546
- mimetype="text/event-stream")
547
 
548
  @app.route("/status")
549
  def status():
550
  running = False
551
- if recording_thread and recording_thread.is_alive():
552
- running = True
 
 
553
  return jsonify(running=running)
554
 
555
- # ---------------- Run ----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  if __name__ == "__main__":
557
- app.run(host="0.0.0.0", port=7860, debug=True)
 
1
+ # merged.py (production-ready for Docker / Hugging Face Spaces)
2
  import os
3
  import time
4
  import threading
5
  import queue
6
  import pathlib
7
+ from pathlib import Path
8
  from flask import Flask, request, jsonify, send_from_directory, Response, stream_with_context, render_template
9
  from werkzeug.utils import secure_filename
10
 
11
+ # Try to import rec_transcribe_extension; we still rely on its utilities
12
+ try:
13
+ import rec_transcribe_extension as rte
14
+ from rec_transcribe_extension import Transcriber, diarization_hook, run_recording
15
+ except Exception as e:
16
+ # If the module import fails, keep rte=None and catch later to provide friendly error messages
17
+ rte = None
18
+ Transcriber = None
19
+ diarization_hook = None
20
+ run_recording = None
21
+ print("Warning: failed to import rec_transcribe_extension:", e)
22
+
23
+ # ---- Environment-driven directories & config ----
24
+ DEFAULT_OUTPUT = os.environ.get("OUTPUT_DIR", "/app/output_transcript_diarization")
25
+ OUTPUT_DIR = Path(DEFAULT_OUTPUT)
26
+ try:
27
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
28
+ except Exception as ex:
29
+ # fallback to /tmp if creation in the requested location fails (common in some runtimes)
30
+ OUTPUT_DIR = Path("/tmp/output_transcript_diarization")
31
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
32
+
33
+ # transcript file path used by SSE endpoint
34
+ TRANSCRIPT_FILE = OUTPUT_DIR / "transcript.txt"
35
+
36
+ # Ensure uploads dir exists (web uploads)
37
+ UPLOAD_FOLDER = Path(os.environ.get("UPLOAD_FOLDER", "/app/uploads"))
38
+ try:
39
+ UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
40
+ except Exception:
41
+ UPLOAD_FOLDER = Path("/tmp/uploads")
42
+ UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
43
 
44
  ALLOWED_EXT = {'.mp3', '.wav', '.m4a', '.aac', '.ogg'}
45
 
 
46
  def allowed_file(filename: str) -> bool:
 
47
  ext = pathlib.Path(filename).suffix.lower()
48
  return ext in ALLOWED_EXT
49
 
50
+ # ---- Try to import pyaudio lazily and detect if host audio devices are accessible ----
51
+ LIVE_RECORDING_SUPPORTED = False
52
+ _pyaudio = None
53
+ try:
54
+ import importlib
55
+ _pyaudio = importlib.import_module("pyaudio")
56
+ # attempt to instantiate PyAudio to confirm it's functional
57
+ try:
58
+ pa = _pyaudio.PyAudio()
59
+ # if there is at least one input device, consider live recording possible
60
+ has_input = any(pa.get_device_info_by_index(i).get("maxInputChannels", 0) > 0
61
+ for i in range(pa.get_device_count()))
62
+ pa.terminate()
63
+ LIVE_RECORDING_SUPPORTED = bool(has_input)
64
+ except Exception as e:
65
+ LIVE_RECORDING_SUPPORTED = False
66
+ print("PyAudio imported but couldn't initialize audio devices:", e)
67
+ except Exception:
68
+ # pyaudio not available
69
+ LIVE_RECORDING_SUPPORTED = False
70
+
71
+ # ---- Flask app ----
72
+ app = Flask(__name__, static_folder=None)
73
+ app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER)
74
 
75
+ # ---- Shared state ----
76
  recording_thread = None
 
77
  recording_lock = threading.Lock()
78
+ recording_status = {"recording": False, "live_segments": []}
79
 
80
+ # ---- Frontend routes ----
 
 
 
 
 
81
  @app.route("/")
82
  def landing():
83
  return render_template("landing.html")
84
 
 
85
  @app.route("/live")
86
  def live_page():
87
  return render_template("index2.html")
88
 
 
89
  @app.route("/upload")
90
  def upload_page():
91
  return render_template("index2_upload.html")
92
 
93
+ # ---- Device listing (only if supported) ----
 
 
94
  @app.route("/api/devices", methods=["GET"])
95
  def api_devices():
96
+ if not LIVE_RECORDING_SUPPORTED:
97
+ return jsonify({"devices": [], "error": "Live recording not supported in this environment."}), 200
98
+ try:
99
+ pa = _pyaudio.PyAudio()
100
+ devices = []
101
+ for i in range(pa.get_device_count()):
102
+ dev = pa.get_device_info_by_index(i)
103
+ if dev.get("maxInputChannels", 0) > 0:
104
+ devices.append({"index": dev["index"], "name": dev["name"]})
105
+ pa.terminate()
106
+ return jsonify({"devices": devices})
107
+ except Exception as e:
108
+ return jsonify({"devices": [], "error": str(e)}), 500
109
 
110
+ # ---- Start recording endpoint (guards if pyaudio unavailable) ----
111
  @app.route("/api/start-recording", methods=["POST"])
112
  def api_start_recording():
113
+ global recording_thread
114
+ if not LIVE_RECORDING_SUPPORTED or _pyaudio is None:
115
+ return jsonify({"error": "Live recording is not supported in this environment."}), 400
116
+
117
+ data = request.json or {}
118
  try:
119
  mic = int(data.get("mic"))
120
  except Exception:
121
  return jsonify({"error": "Missing or invalid 'mic' parameter"}), 400
122
 
 
123
  sys = None
124
  if data.get("sys") not in (None, "", "null"):
125
  try:
 
130
  chunk_secs = int(data.get("chunk_secs", 5))
131
  model = data.get("model", "medium")
132
  no_transcribe = bool(data.get("no_transcribe", False))
133
+
134
  if recording_status["recording"]:
135
  return jsonify({"error": "Already recording"}), 400
136
 
137
+ # validate devices using pyaudio
138
  try:
139
+ pa = _pyaudio.PyAudio()
140
  except Exception as e:
141
  return jsonify({"error": f"PyAudio initialization failed: {e}"}), 500
142
 
 
157
 
158
  pa.terminate()
159
 
160
+ # ready recording state
161
  recording_status["recording"] = True
162
  recording_status["live_segments"] = []
163
  stop_event = threading.Event()
164
 
165
  def run():
166
+ # monkey-patch worker if module supports it
167
+ if rte and hasattr(rte, "chunk_writer_and_transcribe_worker"):
168
+ import rec_transcribe_extension as rte_local
169
+ orig_worker = rte_local.chunk_writer_and_transcribe_worker
170
+
171
+ def patched_worker(in_queue, final_frames_list, transcriber, single_channel_label="mic"):
172
+ while True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
+ filename, frames = in_queue.get(timeout=1.0)
175
+ except queue.Empty:
176
+ if stop_event.is_set() and in_queue.empty():
177
+ break
178
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ try:
181
+ rte_local.save_wav_from_frames(filename, frames, nchannels=rte_local.CHANNELS)
182
+ except Exception:
183
+ # best-effort; continue
184
+ pass
185
 
186
+ # diarization and transcription
187
+ diar_segments = []
188
+ try:
189
+ diar_segments = (rte_local.diarization_hook(str(filename)) or [])
190
+ except Exception:
191
+ diar_segments = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ if transcriber and getattr(transcriber, "model", None):
194
+ try:
195
+ segments, info = transcriber.model.transcribe(str(filename), beam_size=5)
196
+ for seg in segments:
197
+ seg_start = float(getattr(seg, "start", 0.0))
198
+ seg_end = float(getattr(seg, "end", 0.0))
199
+ seg_text = getattr(seg, "text", "").strip()
200
+ speaker = "Unknown"
201
+ for d_start, d_end, d_speaker in diar_segments:
202
+ if (seg_start < d_end) and (seg_end > d_start):
203
+ speaker = d_speaker
204
+ break
205
+ recording_status["live_segments"].append({
206
+ "start": seg_start,
207
+ "end": seg_end,
208
+ "speaker": str(speaker),
209
+ "text": seg_text
210
+ })
211
+ # write to persistent transcript file
212
+ try:
213
+ with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
214
+ tf.write(f"[{pathlib.Path(filename).name}] {seg_start:.2f}-{seg_end:.2f} Speaker {speaker}: {seg_text}\n")
215
+ except Exception:
216
+ pass
217
+ except Exception as e:
218
+ print("Transcription error:", e)
219
+ # patched worker exit
220
+
221
+ rte_local.chunk_writer_and_transcribe_worker = patched_worker
222
+ try:
223
+ rte_local.stop_event = stop_event
224
+ rte_local.run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
225
+ model_name=model, no_transcribe=no_transcribe)
226
+ finally:
227
+ rte_local.chunk_writer_and_transcribe_worker = orig_worker
228
+ else:
229
+ # fallback: call run_recording if available without monkey patch
230
+ try:
231
+ if rte and hasattr(rte, "stop_event"):
232
+ rte.stop_event = stop_event
233
+ if rte and hasattr(rte, "run_recording"):
234
+ rte.run_recording(mic_index=mic, sys_index=sys, chunk_secs=chunk_secs,
235
+ model_name=model, no_transcribe=no_transcribe)
236
+ except Exception as e:
237
+ print("run_recording error:", e)
238
+ recording_status["recording"] = False
239
 
240
+ recording_thread_local = threading.Thread(target=run, daemon=True)
241
+ recording_thread_local.start()
242
+ # store reference globally so stop logic can use it
243
+ global recording_thread
244
+ recording_thread = recording_thread_local
245
+ return jsonify({"ok": True})
246
+
247
+ # Stop recording
248
  @app.route("/api/stop-recording", methods=["POST"])
249
  def api_stop_recording():
250
+ if rte and hasattr(rte, "stop_event") and rte.stop_event:
251
+ try:
252
+ rte.stop_event.set()
253
+ except Exception:
254
+ pass
255
  return jsonify({"ok": True})
256
 
257
+ # recording status
258
  @app.route("/api/recording-status")
259
  def api_recording_status():
260
  return jsonify({
 
262
  "live_segments": recording_status.get("live_segments", [])
263
  })
264
 
265
+ # ---- Upload endpoint (works in Spaces) ----
 
 
266
  @app.route("/api/upload", methods=["POST"])
267
  def api_upload_file():
268
  if 'file' not in request.files:
 
273
  filename = secure_filename(f.filename)
274
  if not allowed_file(filename):
275
  return jsonify(success=False, error="Extension not allowed"), 400
 
 
276
  ts = int(time.time() * 1000)
277
+ saved_name = f"{ts}_{filename}"
278
+ save_path = Path(app.config['UPLOAD_FOLDER']) / saved_name
279
+ try:
280
+ f.save(str(save_path))
281
+ except Exception as e:
282
+ return jsonify(success=False, error=f"Failed to save file: {e}"), 500
283
+ url = f"/uploads/{saved_name}"
284
+ return jsonify(success=True, url=url, filename=saved_name)
285
 
286
+ # Serve uploaded files
287
  @app.route("/uploads/<path:filename>")
288
  def uploaded_file(filename):
289
  return send_from_directory(app.config['UPLOAD_FOLDER'], filename, as_attachment=False)
290
 
291
+ # ---- Transcribe an uploaded file in a paced 'live' manner (works in Spaces) ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  @app.route("/api/start-transcribe-file", methods=["POST"])
293
  def api_start_transcribe_file():
294
+ data = request.json or {}
295
+ filename = data.get("filename")
296
+ if not filename:
297
+ return jsonify({"error": "Missing filename"}), 400
298
+ file_path = OUTPUT_DIR / filename
299
+ # if file was uploaded to uploads folder, prefer that path
300
+ uploaded_path = Path(app.config['UPLOAD_FOLDER']) / filename
301
+ if uploaded_path.exists():
302
+ file_path = uploaded_path
303
+
304
+ if not file_path.exists():
305
+ return jsonify({"error": "File not found"}), 404
306
+
307
+ if recording_status.get("recording"):
308
+ return jsonify({"error": "Busy"}), 400
309
+
310
+ def worker():
 
 
311
  try:
312
+ recording_status["recording"] = True
313
+ recording_status["live_segments"] = []
314
+ transcriber = Transcriber() if Transcriber else None
315
+ diar_segments = diarization_hook(str(file_path)) if diarization_hook else []
316
+ if transcriber and getattr(transcriber, "model", None):
317
+ segments, _ = transcriber.model.transcribe(str(file_path), beam_size=5)
318
+ start_clock = time.time()
319
+ for seg in segments:
320
+ wait_for = seg.start - (time.time() - start_clock)
321
+ if wait_for > 0:
322
+ time.sleep(wait_for)
323
+ speaker = "Unknown"
324
+ for d_start, d_end, d_label in (diar_segments or []):
325
+ if (seg.start < d_end) and (seg.end > d_start):
326
+ speaker = d_label
327
+ break
328
+ seg_obj = {"start": float(seg.start), "end": float(seg.end), "speaker": speaker, "text": seg.text.strip()}
329
+ recording_status["live_segments"].append(seg_obj)
330
+ # append to transcript file for SSE streaming
331
+ try:
332
+ with open(TRANSCRIPT_FILE, "a", encoding="utf-8") as tf:
333
+ tf.write(f"[{file_path.name}] {seg.start:.2f}-{seg.end:.2f} Speaker {speaker}: {seg.text.strip()}\n")
334
+ except Exception:
335
+ pass
336
+ recording_status["recording"] = False
337
+ except Exception as e:
338
+ print("Error in file transcription:", e)
339
+ recording_status["recording"] = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
+ threading.Thread(target=worker, daemon=True).start()
342
+ return jsonify({"ok": True})
 
343
 
344
+ # Stop (generic)
345
  @app.route("/stop", methods=["POST"])
346
  def stop_recording():
347
+ if rte and hasattr(rte, 'stop_event') and rte.stop_event is not None:
348
+ try:
349
+ rte.stop_event.set()
350
+ except Exception:
351
+ pass
 
 
 
 
 
 
352
  return jsonify(success=True, message="Stop signal sent")
353
 
354
+ # SSE tailer
355
  def tail_transcript_file(path, stop_cond_fn=None):
 
 
 
 
 
356
  last_pos = 0
357
  sent_initial = False
358
  while True:
 
370
  last_pos = fh.tell()
371
  sent_initial = True
372
  else:
 
373
  time.sleep(0.25)
374
  else:
375
  if not sent_initial:
376
  yield "data: [info] Transcript file not yet created. Waiting...\n\n"
377
  sent_initial = True
378
  time.sleep(0.5)
 
379
  yield "data: [info] Transcription ended.\n\n"
 
 
380
 
381
  @app.route("/events")
382
  def events():
383
+ transcript_path = str(TRANSCRIPT_FILE)
 
 
 
 
 
 
 
 
384
  def stop_fn():
 
385
  cond = False
386
  try:
387
+ cond = (rte and hasattr(rte, 'stop_event') and rte.stop_event is not None and rte.stop_event.is_set())
 
388
  except Exception:
389
  cond = False
390
+ t_alive = False
391
+ try:
392
+ t_alive = 'recording_thread' in globals() and recording_thread is not None and recording_thread.is_alive()
393
+ except Exception:
394
+ t_alive = False
395
  return (cond and not t_alive)
396
+ return Response(stream_with_context(tail_transcript_file(transcript_path, stop_cond_fn=stop_fn)), mimetype="text/event-stream")
 
 
397
 
398
  @app.route("/status")
399
  def status():
400
  running = False
401
+ try:
402
+ running = recording_status.get("recording", False)
403
+ except Exception:
404
+ running = False
405
  return jsonify(running=running)
406
 
407
+ # Final-files listing (for UI)
408
+ @app.route("/api/final-files")
409
+ def api_final_files():
410
+ files = []
411
+ # list files from OUTPUT_DIR and uploads
412
+ try:
413
+ out_dir = OUTPUT_DIR
414
+ for fname in os.listdir(out_dir):
415
+ if fname.endswith(".wav") or fname.endswith(".txt"):
416
+ files.append({"name": fname, "path": f"/static/{fname}", "url": f"/static/{fname}"})
417
+ except Exception:
418
+ pass
419
+ # also list uploaded files
420
+ try:
421
+ for fname in os.listdir(app.config['UPLOAD_FOLDER']):
422
+ if fname.endswith(".wav") or fname.endswith(".mp3") or fname.endswith(".txt"):
423
+ files.append({"name": fname, "path": f"/uploads/{fname}", "url": f"/uploads/{fname}"})
424
+ except Exception:
425
+ pass
426
+ return jsonify({"files": files})
427
+
428
+ # Serve static final-files from OUTPUT_DIR (if you want to expose them at /static/<file>)
429
+ @app.route('/static/<path:filename>')
430
+ def static_files(filename):
431
+ return send_from_directory(str(OUTPUT_DIR), filename)
432
+
433
+ # Run only when debugging locally; in production we use gunicorn
434
  if __name__ == "__main__":
435
+ app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)), threaded=True)