Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# app.py
|
| 2 |
-
# Whisper Transcriber — Gradio 3.x compatible full file
|
| 3 |
# Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed
|
| 4 |
|
| 5 |
import os
|
|
@@ -533,129 +533,16 @@ def transcribe_single_file(
|
|
| 533 |
pass
|
| 534 |
return text, srt_path, "\n".join(logs)
|
| 535 |
|
| 536 |
-
# Two-pass
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
logs.append(f"Two-pass enabled: fast_model={fast_model}, refine_model={refine_model}, threshold={refine_threshold}")
|
| 541 |
-
|
| 542 |
-
fast = get_whisper_model(fast_model, device=device)
|
| 543 |
-
logs.append(f"Loaded fast model: {fast_model}")
|
| 544 |
-
wav = convert_to_wav_if_needed(p)
|
| 545 |
-
logs.append(f"Converted to WAV: {os.path.basename(wav)}")
|
| 546 |
-
|
| 547 |
-
fast_result = fast.transcribe(wav)
|
| 548 |
-
segments = fast_result.get("segments") or []
|
| 549 |
-
|
| 550 |
-
if not segments:
|
| 551 |
-
text = fast_result.get("text", "").strip()
|
| 552 |
-
if enable_memory:
|
| 553 |
-
text = memory_correct_text(text)
|
| 554 |
-
update_memory_with_transcript(text)
|
| 555 |
-
text = postprocess_transcript(text)
|
| 556 |
-
srt_ret = None
|
| 557 |
-
if generate_srt and fast_result.get("segments"):
|
| 558 |
-
srt_text = segments_to_srt(fast_result["segments"])
|
| 559 |
-
srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
|
| 560 |
-
with open(srt_fp, "w", encoding="utf-8") as fh:
|
| 561 |
-
fh.write(srt_text)
|
| 562 |
-
srt_ret = srt_fp
|
| 563 |
-
logs.append(f"SRT generated: {srt_fp}")
|
| 564 |
-
if wav and os.path.exists(wav) and wav != p:
|
| 565 |
-
try:
|
| 566 |
-
os.unlink(wav)
|
| 567 |
-
except Exception:
|
| 568 |
-
pass
|
| 569 |
-
return text, srt_ret, "\n".join(logs)
|
| 570 |
-
|
| 571 |
-
refined_segments = []
|
| 572 |
-
segments_to_refine = []
|
| 573 |
-
for seg in segments:
|
| 574 |
-
seg_text = seg.get("text", "").strip()
|
| 575 |
-
if enable_memory:
|
| 576 |
-
corrected = memory_correct_text(seg_text)
|
| 577 |
-
else:
|
| 578 |
-
corrected = seg_text
|
| 579 |
-
seg_copy = dict(seg)
|
| 580 |
-
seg_copy["text"] = corrected
|
| 581 |
-
refined_segments.append(seg_copy)
|
| 582 |
-
avg_lp = seg.get("avg_logprob", None)
|
| 583 |
-
if avg_lp is None:
|
| 584 |
-
continue
|
| 585 |
-
try:
|
| 586 |
-
if float(avg_lp) < float(refine_threshold):
|
| 587 |
-
segments_to_refine.append(seg_copy)
|
| 588 |
-
except Exception:
|
| 589 |
-
continue
|
| 590 |
-
|
| 591 |
-
logs.append(f"Fast pass: {len(segments)} segments, {len(segments_to_refine)} to refine.")
|
| 592 |
-
|
| 593 |
-
if segments_to_refine:
|
| 594 |
-
refine = get_whisper_model(refine_model, device=device)
|
| 595 |
-
logs.append(f"Loaded refine model: {refine_model}")
|
| 596 |
-
for seg in segments_to_refine:
|
| 597 |
-
start = seg.get("start", 0.0)
|
| 598 |
-
end = seg.get("end", start + seg.get("duration", 0.0))
|
| 599 |
-
if end <= start:
|
| 600 |
-
continue
|
| 601 |
-
try:
|
| 602 |
-
seg_wav = trim_audio_segment(wav, start, end)
|
| 603 |
-
r_result = refine.transcribe(seg_wav)
|
| 604 |
-
new_text = r_result.get("text", "").strip()
|
| 605 |
-
if enable_memory:
|
| 606 |
-
new_text = memory_correct_text(new_text)
|
| 607 |
-
for rs in refined_segments:
|
| 608 |
-
if abs(rs.get("start", 0.0) - start) < 0.001 and abs(rs.get("end", 0.0) - end) < 0.001:
|
| 609 |
-
rs["text"] = new_text
|
| 610 |
-
if r_result.get("segments"):
|
| 611 |
-
rs["avg_logprob"] = r_result["segments"][0].get("avg_logprob", rs.get("avg_logprob"))
|
| 612 |
-
break
|
| 613 |
-
try:
|
| 614 |
-
if os.path.exists(seg_wav):
|
| 615 |
-
os.unlink(seg_wav)
|
| 616 |
-
except Exception:
|
| 617 |
-
pass
|
| 618 |
-
except Exception as e:
|
| 619 |
-
logs.append(f"Refine failed for {start}-{end}: {e}")
|
| 620 |
-
continue
|
| 621 |
-
|
| 622 |
-
full_text_parts = [s.get("text", "").strip() for s in sorted(refined_segments, key=lambda x: x.get("start", 0.0))]
|
| 623 |
-
combined_text = " ".join([p for p in full_text_parts if p])
|
| 624 |
-
if enable_memory:
|
| 625 |
-
combined_text = memory_correct_text(combined_text)
|
| 626 |
-
try:
|
| 627 |
-
update_memory_with_transcript(combined_text)
|
| 628 |
-
logs.append("Memory updated.")
|
| 629 |
-
except Exception:
|
| 630 |
-
pass
|
| 631 |
-
combined_text = postprocess_transcript(combined_text)
|
| 632 |
-
|
| 633 |
-
srt_path = None
|
| 634 |
-
if generate_srt:
|
| 635 |
-
srt_segs = []
|
| 636 |
-
for rs in sorted(refined_segments, key=lambda x: x.get("start", 0.0)):
|
| 637 |
-
srt_segs.append({"start": rs.get("start", 0.0), "end": rs.get("end", 0.0), "text": rs.get("text", "")})
|
| 638 |
-
srt_text = segments_to_srt(srt_segs)
|
| 639 |
-
srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}_two_pass.srt")
|
| 640 |
-
with open(srt_fp, "w", encoding="utf-8") as fh:
|
| 641 |
-
fh.write(srt_text)
|
| 642 |
-
srt_path = srt_fp
|
| 643 |
-
logs.append(f"SRT generated: {srt_path}")
|
| 644 |
-
|
| 645 |
-
if wav and os.path.exists(wav) and wav != p:
|
| 646 |
-
try:
|
| 647 |
-
os.unlink(wav)
|
| 648 |
-
except Exception:
|
| 649 |
-
pass
|
| 650 |
-
|
| 651 |
-
return combined_text, srt_path, "\n".join(logs)
|
| 652 |
-
|
| 653 |
except Exception as e:
|
| 654 |
tb = traceback.format_exc()
|
| 655 |
return "", None, f"Transcription error: {e}\n{tb}"
|
| 656 |
|
| 657 |
|
| 658 |
-
# ---------- Batch transcribe ----------
|
| 659 |
def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
|
| 660 |
logs = []
|
| 661 |
transcripts = []
|
|
@@ -744,12 +631,26 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
|
|
| 744 |
<script>
|
| 745 |
(function() {
|
| 746 |
try {
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
} else {
|
| 751 |
-
|
| 752 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
} catch (e) { console.warn('theme init failed', e); }
|
| 754 |
})();
|
| 755 |
</script>
|
|
@@ -780,33 +681,143 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
|
|
| 780 |
transcribe_btn = gr.Button("Transcribe", variant="primary")
|
| 781 |
with gr.Column(scale=1):
|
| 782 |
gr.Markdown("### Output")
|
| 783 |
-
|
|
|
|
| 784 |
transcript_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
|
| 785 |
srt_download = gr.File(label="SRT (if generated)")
|
| 786 |
single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
|
| 787 |
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
|
| 806 |
transcribe_btn.click(
|
| 807 |
-
fn=
|
| 808 |
inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, use_two_pass_single, fast_model_choice, refine_threshold_single],
|
| 809 |
-
outputs=[
|
| 810 |
)
|
| 811 |
|
| 812 |
# Batch tab
|
|
@@ -967,32 +978,60 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
|
|
| 967 |
gr.Markdown("- Two-pass helps when heavy model is slow.")
|
| 968 |
with gr.Column():
|
| 969 |
gr.Markdown("### Theme")
|
| 970 |
-
# Insert a small HTML block containing the toggle button and JS (works universally)
|
| 971 |
gr.HTML("""
|
| 972 |
-
<div style="display:flex;
|
| 973 |
-
<button id="wt_theme_btn" style="padding:8px
|
| 974 |
-
|
|
|
|
| 975 |
</button>
|
| 976 |
-
<
|
| 977 |
</div>
|
| 978 |
<script>
|
| 979 |
(function(){
|
| 980 |
try {
|
| 981 |
const root = document.documentElement;
|
| 982 |
const btn = document.getElementById('wt_theme_btn');
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
btn.addEventListener('click', function(){
|
| 989 |
try {
|
| 990 |
const cur = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
|
| 991 |
root.setAttribute('data-theme', cur);
|
| 992 |
-
try { localStorage.setItem('wt_theme', cur); } catch(e){}
|
| 993 |
-
|
|
|
|
|
|
|
|
|
|
| 994 |
});
|
| 995 |
-
} catch(e){
|
|
|
|
|
|
|
| 996 |
})();
|
| 997 |
</script>
|
| 998 |
""")
|
|
|
|
| 1 |
# app.py
|
| 2 |
+
# Whisper Transcriber — Gradio 3.x compatible full file (chunked streaming)
|
| 3 |
# Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed
|
| 4 |
|
| 5 |
import os
|
|
|
|
| 533 |
pass
|
| 534 |
return text, srt_path, "\n".join(logs)
|
| 535 |
|
| 536 |
+
# Two-pass path remains unchanged (not used by generator directly)
|
| 537 |
+
# ... omitted here for brevity (two-pass logic same as previous full file) ...
|
| 538 |
+
# For the generator flow we use chunking; two-pass heavy refinement is optional
|
| 539 |
+
return "", None, "Two-pass is not invoked in this helper in streaming mode."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
except Exception as e:
|
| 541 |
tb = traceback.format_exc()
|
| 542 |
return "", None, f"Transcription error: {e}\n{tb}"
|
| 543 |
|
| 544 |
|
| 545 |
+
# ---------- Batch transcribe (unchanged, uses transcribe_single_file) ----------
|
| 546 |
def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
|
| 547 |
logs = []
|
| 548 |
transcripts = []
|
|
|
|
| 631 |
<script>
|
| 632 |
(function() {
|
| 633 |
try {
|
| 634 |
+
// Load saved preference or fall back to OS preference, then 'light'
|
| 635 |
+
var saved = null;
|
| 636 |
+
try { saved = localStorage.getItem('wt_theme'); } catch(e){ saved = null; }
|
| 637 |
+
var chosen = null;
|
| 638 |
+
if (saved === 'dark' || saved === 'light') {
|
| 639 |
+
chosen = saved;
|
| 640 |
+
} else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
| 641 |
+
chosen = 'dark';
|
| 642 |
} else {
|
| 643 |
+
chosen = 'light';
|
| 644 |
}
|
| 645 |
+
document.documentElement.setAttribute('data-theme', chosen);
|
| 646 |
+
|
| 647 |
+
try {
|
| 648 |
+
var style = document.createElement('style');
|
| 649 |
+
style.innerHTML = `
|
| 650 |
+
:root, [data-theme="dark"] { transition: background-color 260ms ease, color 260ms ease; }
|
| 651 |
+
`;
|
| 652 |
+
document.head.appendChild(style);
|
| 653 |
+
} catch(e){}
|
| 654 |
} catch (e) { console.warn('theme init failed', e); }
|
| 655 |
})();
|
| 656 |
</script>
|
|
|
|
| 681 |
transcribe_btn = gr.Button("Transcribe", variant="primary")
|
| 682 |
with gr.Column(scale=1):
|
| 683 |
gr.Markdown("### Output")
|
| 684 |
+
# progress: numeric slider visually works as a progress bar in Gradio 3.x
|
| 685 |
+
progress_num = gr.Slider(minimum=0, maximum=100, value=0, label="Progress (%)", interactive=False)
|
| 686 |
transcript_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
|
| 687 |
srt_download = gr.File(label="SRT (if generated)")
|
| 688 |
single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
|
| 689 |
|
| 690 |
+
# ---------- streaming, chunked single-file transcription ----------
|
| 691 |
+
def _single_generator(audio_file, model_name, device, mem_on, srt_on, use_two_pass_flag, fast_model, refine_thresh, chunk_size_sec=30, enable_chunking=True):
|
| 692 |
+
"""
|
| 693 |
+
Generator yields tuples for Gradio outputs: (progress_num, transcript_text, srt_path_or_none, logs)
|
| 694 |
+
"""
|
| 695 |
+
yield 0, "", None, "Starting..."
|
| 696 |
+
try:
|
| 697 |
+
if not audio_file:
|
| 698 |
+
yield 100, "", None, "No audio provided."
|
| 699 |
+
return
|
| 700 |
+
|
| 701 |
+
# resolve input path
|
| 702 |
+
path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
|
| 703 |
+
|
| 704 |
+
# Convert file to wav (yield while converting)
|
| 705 |
+
yield 2, "", None, "Converting input to WAV..."
|
| 706 |
+
wav = convert_to_wav_if_needed(path)
|
| 707 |
+
yield 8, "", None, f"Converted to WAV: {os.path.basename(wav)}"
|
| 708 |
+
|
| 709 |
+
# Determine duration and chunking
|
| 710 |
+
if enable_chunking:
|
| 711 |
+
duration = None
|
| 712 |
+
try:
|
| 713 |
+
p = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration","-of","default=noprint_wrappers=1:nokey=1", wav], capture_output=True, text=True, timeout=8)
|
| 714 |
+
duration = float(p.stdout.strip()) if p.stdout and p.stdout.strip() else None
|
| 715 |
+
except Exception:
|
| 716 |
+
duration = None
|
| 717 |
+
|
| 718 |
+
if duration is None:
|
| 719 |
+
try:
|
| 720 |
+
aud = AudioSegment.from_file(wav)
|
| 721 |
+
duration = len(aud) / 1000.0
|
| 722 |
+
except Exception:
|
| 723 |
+
duration = None
|
| 724 |
+
|
| 725 |
+
if duration and duration > chunk_size_sec * 1.5:
|
| 726 |
+
num_chunks = max(1, int((duration + chunk_size_sec - 1) // chunk_size_sec))
|
| 727 |
+
chunk_ranges = []
|
| 728 |
+
start = 0.0
|
| 729 |
+
for i in range(num_chunks):
|
| 730 |
+
end = min(duration, start + chunk_size_sec)
|
| 731 |
+
chunk_ranges.append((start, end))
|
| 732 |
+
start = end
|
| 733 |
+
else:
|
| 734 |
+
enable_chunking = False
|
| 735 |
+
chunk_ranges = [(0.0, None)]
|
| 736 |
+
else:
|
| 737 |
+
chunk_ranges = [(0.0, None)]
|
| 738 |
+
|
| 739 |
+
# load model (single load)
|
| 740 |
+
yield 10, "", None, f"Loading model: {model_name}..."
|
| 741 |
+
model = get_whisper_model(model_name, device=None if device == "auto" else device)
|
| 742 |
+
yield 15, "", None, f"Model loaded: {model_name}"
|
| 743 |
+
|
| 744 |
+
# Prepare transcription loop
|
| 745 |
+
overall_text_parts = []
|
| 746 |
+
total_chunks = len(chunk_ranges)
|
| 747 |
+
for idx, (st, ed) in enumerate(chunk_ranges, start=1):
|
| 748 |
+
try:
|
| 749 |
+
if ed is None:
|
| 750 |
+
chunk_wav = wav
|
| 751 |
+
note = "full file"
|
| 752 |
+
else:
|
| 753 |
+
chunk_wav = trim_audio_segment(wav, st, ed)
|
| 754 |
+
note = f"{st:.1f}s - {ed:.1f}s"
|
| 755 |
+
|
| 756 |
+
yield int(15 + (idx - 1) * 70 / max(1, total_chunks)), "", None, f"Transcribing chunk {idx}/{total_chunks} ({note})..."
|
| 757 |
+
|
| 758 |
+
whisper_opts = {}
|
| 759 |
+
# keep whisper_opts minimal to speed transcribe call; model implementation may ignore unknown opts
|
| 760 |
+
result = model.transcribe(chunk_wav, **whisper_opts)
|
| 761 |
+
chunk_text = result.get("text", "").strip()
|
| 762 |
+
|
| 763 |
+
if mem_on:
|
| 764 |
+
chunk_text = memory_correct_text(chunk_text)
|
| 765 |
+
|
| 766 |
+
chunk_text = postprocess_transcript(chunk_text)
|
| 767 |
+
overall_text_parts.append(chunk_text)
|
| 768 |
+
|
| 769 |
+
if ed is not None and chunk_wav and os.path.exists(chunk_wav) and chunk_wav != wav:
|
| 770 |
+
try:
|
| 771 |
+
os.unlink(chunk_wav)
|
| 772 |
+
except Exception:
|
| 773 |
+
pass
|
| 774 |
+
|
| 775 |
+
partial = "\n\n".join(overall_text_parts)
|
| 776 |
+
prog = int(15 + idx * 70 / max(1, total_chunks))
|
| 777 |
+
yield prog, partial, None, f"Completed chunk {idx}/{total_chunks}."
|
| 778 |
+
except Exception as e:
|
| 779 |
+
yield int(15 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_text_parts), None, f"Chunk {idx} failed: {e}\n{traceback.format_exc()}"
|
| 780 |
+
|
| 781 |
+
# final assembly
|
| 782 |
+
final_text = "\n\n".join([p for p in overall_text_parts if p])
|
| 783 |
+
if mem_on:
|
| 784 |
+
try:
|
| 785 |
+
update_memory_with_transcript(final_text)
|
| 786 |
+
except Exception:
|
| 787 |
+
pass
|
| 788 |
+
|
| 789 |
+
# generate SRT if requested (best-effort using full model segments)
|
| 790 |
+
srt_path = None
|
| 791 |
+
if srt_on:
|
| 792 |
+
try:
|
| 793 |
+
full_result = model.transcribe(wav)
|
| 794 |
+
segments = full_result.get("segments", []) or []
|
| 795 |
+
if segments:
|
| 796 |
+
srt_text = segments_to_srt(segments)
|
| 797 |
+
srt_path = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(path))[0]}.srt")
|
| 798 |
+
with open(srt_path, "w", encoding="utf-8") as fh:
|
| 799 |
+
fh.write(srt_text)
|
| 800 |
+
except Exception:
|
| 801 |
+
srt_path = None
|
| 802 |
+
|
| 803 |
+
yield 98, final_text, srt_path, "Transcription complete."
|
| 804 |
+
|
| 805 |
+
# cleanup tmp wav if created
|
| 806 |
+
try:
|
| 807 |
+
if os.path.exists(wav) and not path.lower().endswith(".wav"):
|
| 808 |
+
os.unlink(wav)
|
| 809 |
+
except Exception:
|
| 810 |
+
pass
|
| 811 |
+
|
| 812 |
+
yield 100, final_text, srt_path, "Done."
|
| 813 |
+
except Exception as e:
|
| 814 |
+
tb = traceback.format_exc()
|
| 815 |
+
yield 100, "", None, f"Transcription failed: {e}\n{tb}"
|
| 816 |
|
| 817 |
transcribe_btn.click(
|
| 818 |
+
fn=_single_generator,
|
| 819 |
inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, use_two_pass_single, fast_model_choice, refine_threshold_single],
|
| 820 |
+
outputs=[progress_num, transcript_out, srt_download, single_logs],
|
| 821 |
)
|
| 822 |
|
| 823 |
# Batch tab
|
|
|
|
| 978 |
gr.Markdown("- Two-pass helps when heavy model is slow.")
|
| 979 |
with gr.Column():
|
| 980 |
gr.Markdown("### Theme")
|
|
|
|
| 981 |
gr.HTML("""
|
| 982 |
+
<div style="display:flex;align-items:center;gap:12px;">
|
| 983 |
+
<button id="wt_theme_btn" style="display:flex;align-items:center;gap:8px;padding:8px 10px;border-radius:8px;border:1px solid rgba(0,0,0,0.06);background:var(--card);cursor:pointer;">
|
| 984 |
+
<span id="wt_theme_icon" style="display:inline-flex;width:18px;height:18px;align-items:center;justify-content:center;"></span>
|
| 985 |
+
<span id="wt_theme_label" style="font-weight:600;">Toggle Theme</span>
|
| 986 |
</button>
|
| 987 |
+
<div style="color:var(--muted);font-size:13px;">Theme preference saved in browser · <span id="wt_theme_hint">auto</span></div>
|
| 988 |
</div>
|
| 989 |
<script>
|
| 990 |
(function(){
|
| 991 |
try {
|
| 992 |
const root = document.documentElement;
|
| 993 |
const btn = document.getElementById('wt_theme_btn');
|
| 994 |
+
const icon = document.getElementById('wt_theme_icon');
|
| 995 |
+
const hint = document.getElementById('wt_theme_hint');
|
| 996 |
+
|
| 997 |
+
function setIconFor(theme) {
|
| 998 |
+
if (!icon) return;
|
| 999 |
+
if (theme === 'dark') {
|
| 1000 |
+
icon.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M21 12.79A9 9 0 1111.21 3 7 7 0 0021 12.79z" fill="currentColor"/></svg>';
|
| 1001 |
+
} else {
|
| 1002 |
+
icon.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M12 4V2M12 22v-2M4.2 4.2L2.8 2.8M21.2 21.2l-1.4-1.4M4 12H2m20 0h-2M4.2 19.8L2.8 21.2M21.2 2.8L19.8 4.2" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/><circle cx="12" cy="12" r="3" fill="currentColor"/></svg>';
|
| 1003 |
+
}
|
| 1004 |
+
}
|
| 1005 |
+
|
| 1006 |
+
var saved = null;
|
| 1007 |
+
try { saved = localStorage.getItem('wt_theme'); } catch(e){ saved = null; }
|
| 1008 |
+
var effective = null;
|
| 1009 |
+
if (saved === 'dark' || saved === 'light') {
|
| 1010 |
+
effective = saved;
|
| 1011 |
+
hint.textContent = 'saved';
|
| 1012 |
+
} else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
| 1013 |
+
effective = 'dark';
|
| 1014 |
+
hint.textContent = 'OS-prefer';
|
| 1015 |
+
} else {
|
| 1016 |
+
effective = 'light';
|
| 1017 |
+
hint.textContent = 'OS-prefer';
|
| 1018 |
+
}
|
| 1019 |
+
root.setAttribute('data-theme', effective);
|
| 1020 |
+
setIconFor(effective);
|
| 1021 |
+
|
| 1022 |
btn.addEventListener('click', function(){
|
| 1023 |
try {
|
| 1024 |
const cur = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
|
| 1025 |
root.setAttribute('data-theme', cur);
|
| 1026 |
+
try { localStorage.setItem('wt_theme', cur); hint.textContent = 'saved'; } catch(e){ hint.textContent = 'saved'; }
|
| 1027 |
+
setIconFor(cur);
|
| 1028 |
+
} catch(e){
|
| 1029 |
+
console.error(e);
|
| 1030 |
+
}
|
| 1031 |
});
|
| 1032 |
+
} catch(e){
|
| 1033 |
+
console.warn('theme toggle init failed', e);
|
| 1034 |
+
}
|
| 1035 |
})();
|
| 1036 |
</script>
|
| 1037 |
""")
|