palli23 commited on
Commit
6adf5a9
·
verified ·
1 Parent(s): 98e924b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -27
app.py CHANGED
@@ -1,5 +1,18 @@
1
- # app.py — Íslensk talgreining + talnaraðgreining (works 100 %)
2
- import os, threading, time, requests
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def keep_awake():
4
  while True:
5
  time.sleep(45*60)
@@ -8,11 +21,7 @@ def keep_awake():
8
  except: pass
9
  threading.Thread(target=keep_awake, daemon=True).start()
10
 
11
- import gradio as gr
12
- from transformers import pipeline
13
- from pyannote.audio import Pipeline
14
-
15
- # Your Whisper-small
16
  asr = pipeline(
17
  "automatic-speech-recognition",
18
  model="palli23/whisper-small-sam_spjall",
@@ -22,35 +31,75 @@ asr = pipeline(
22
  batch_size=8,
23
  )
24
 
25
- # pyannote 3.1 diarization
26
- diarization = Pipeline.from_pretrained(
27
- "pyannote/speaker-diarization-3.1",
28
- use_auth_token=True
 
 
 
 
 
 
 
 
 
 
29
  )
30
 
31
- def transcribe(audio, diarize=True):
32
- if not audio: return "Hladdu upp hljóð"
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Raw transcription
35
- text = asr(audio)["text"]
 
 
36
 
37
- if not diarize:
38
- return text
 
 
 
 
 
 
 
39
 
40
- # Diarization + speaker labels
41
- result = diarization(audio)
 
 
 
42
  lines = []
43
- for turn, _, speaker in result.itertracks(yield_label=True):
44
- lines.append(f"[{speaker}] {turn.start:.1f}–{turn.end:.1f}s: {text}")
 
 
 
 
45
  return "\n".join(lines)
46
 
47
- with gr.Blocks() as demo:
48
- gr.Markdown("# Íslensk talgreining + talnarar")
49
- gr.Markdown("**palli23/whisper-small + pyannote 3.1** • pallinr1@protonmail.com")
 
 
50
  audio = gr.Audio(type="filepath", label="Hladdu upp hljóð (max 15 mín)")
51
- chk = gr.Checkbox(label="Virkja talnaraðgreiningu", value=True)
 
52
  btn = gr.Button("Transcribe", variant="primary")
53
- out = gr.Textbox(lines=30, label="Útskrift")
54
- btn.click(transcribe, inputs=[audio, chk], outputs=out)
 
55
 
56
  demo.launch(auth=None, share=True)
 
1
+ # app.py — Íslensk talgreining with WhisperX Diarization & Timestamps
2
+ # Public, no login, pallinr1@protonmail.com
3
+
4
+ import os
5
+ os.environ["OMP_NUM_THREADS"] = "1"
6
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
7
+
8
+ import gradio as gr
9
+ import spaces
10
+ import whisperx
11
+ import torch
12
+ from transformers import pipeline
13
+
14
+ # Keep Space awake
15
+ import threading, time, requests
16
  def keep_awake():
17
  while True:
18
  time.sleep(45*60)
 
21
  except: pass
22
  threading.Thread(target=keep_awake, daemon=True).start()
23
 
24
+ # Load your Whisper-small
 
 
 
 
25
  asr = pipeline(
26
  "automatic-speech-recognition",
27
  model="palli23/whisper-small-sam_spjall",
 
31
  batch_size=8,
32
  )
33
 
34
+ # WhisperX setup (diarization + timestamps)
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ batch_size = 16
37
+ compute_type = "float16"
38
+
39
+ # Load WhisperX model (your Whisper-small)
40
+ model = whisperx.load_model("palli23/whisper-small-sam_spjall", device, compute_type=compute_type)
41
+
42
+ # Load diarization model (pyannote internal to WhisperX)
43
+ diarize_model = whisperx.DiarizationPipeline(
44
+ use_auth_token=True,
45
+ device=device,
46
+ min_speakers=1,
47
+ max_speakers=6,
48
  )
49
 
50
+ # Load alignment model (for timestamps)
51
+ align_model, metadata = whisperx.load_align_model(language_code="is", device=device)
52
+
53
+ def transcribe_with_whisperx(audio_path, use_diarization=True):
54
+ if not audio_path:
55
+ return "Hladdu upp hljóðskrá"
56
+
57
+ # Load audio
58
+ audio = whisperx.load_audio(audio_path)
59
+
60
+ # Transcribe with your model
61
+ result = model.transcribe(audio, batch_size=batch_size)
62
 
63
+ # Align for word-level timestamps
64
+ result = whisperx.align(
65
+ result["segments"], align_model, metadata, audio, device, return_char_alignments=False
66
+ )
67
 
68
+ if not use_diarization:
69
+ # Return with timestamps (no speakers)
70
+ lines = []
71
+ for segment in result["segments"]:
72
+ start = segment["start"]
73
+ end = segment["end"]
74
+ text = segment["text"]
75
+ lines.append(f"{start:.1f}s – {end:.1f}s: {text}")
76
+ return "\n".join(lines)
77
 
78
+ # Diarization
79
+ diarize_segments = diarize_model(audio)
80
+ result = whisperx.assign_word_speakers(diarize_segments, result)
81
+
82
+ # Return with speakers + timestamps
83
  lines = []
84
+ for segment in result["segments"]:
85
+ speaker = segment.get("speaker", "Unknown")
86
+ start = segment["start"]
87
+ end = segment["end"]
88
+ text = segment["text"]
89
+ lines.append(f"[{speaker}] {start:.1f}s – {end:.1f}s: {text}")
90
  return "\n".join(lines)
91
 
92
+ # UI public, no login, your email
93
+ with gr.Blocks(title="Íslensk talgreining + WhisperX") as demo:
94
+ gr.Markdown("# Íslensk talgreining + WhisperX")
95
+ gr.Markdown("**Whisper-small + diarization + timestamps • pallinr1@protonmail.com**")
96
+
97
  audio = gr.Audio(type="filepath", label="Hladdu upp hljóð (max 15 mín)")
98
+ diarize = gr.Checkbox(label="Virkja diarization + timestamps", value=True)
99
+
100
  btn = gr.Button("Transcribe", variant="primary")
101
+ out = gr.Textbox(lines=25, label="Útskrift", show_copy_button=True)
102
+
103
+ btn.click(transcribe_with_whisperx, inputs=[audio, diarize], outputs=out)
104
 
105
  demo.launch(auth=None, share=True)