palli23 commited on
Commit
04cee61
·
1 Parent(s): d89e139

diarization

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – Whisper-small + Mælendagreining (pyannote 3.1) – ZeroGPU
2
  import os
3
  import gradio as gr
4
  import spaces
@@ -8,21 +8,20 @@ import tempfile
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
11
- # Mælendagreining – BESTA módel 2025 (þú hefur samþykkt license)
12
- @spaces.GPU(duration=120) # 120 sek max – nóg fyrir 5 mín hljóð
13
  def transcribe_with_diarization(audio_path):
14
  if not audio_path:
15
  return "Hladdu upp hljóðskrá"
16
 
17
- # 1. Mælendagreining (pyannote)
18
  diarization = Pipeline.from_pretrained(
19
  "pyannote/speaker-diarization-3.1",
20
- use_auth_token=os.getenv("HF_TOKEN")
21
  ).to("cuda")
22
 
23
  dia_result = diarization(audio_path)
24
 
25
- # 2. Whisper-small á hverjum mælandahluta
26
  asr = pipeline(
27
  "automatic-speech-recognition",
28
  model=MODEL_NAME,
@@ -32,31 +31,24 @@ def transcribe_with_diarization(audio_path):
32
 
33
  full_text = ""
34
  for turn, _, speaker in dia_result.itertracks(yield_label=True):
35
- start = turn.start
36
- end = turn.end
37
-
38
- # Klippa út segmentið
39
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
40
  dia_result.crop(audio_path, turn).export(tmp.name, format="wav")
41
  segment_path = tmp.name
42
 
43
  text = asr(segment_path)["text"].strip()
44
  full_text += f"[MÆLENDI {speaker}] {text}\n"
45
-
46
- os.unlink(segment_path) # hreinsa temp skrá
47
 
48
  return full_text or "Ekkert heyrt"
49
 
50
- # Gradio interface
51
- with gr.Blocks(title="Íslenskt ASR + Mælendagreining") as demo:
52
  gr.Markdown("# Íslenskt ASR + Mælendagreining")
53
- gr.Markdown("**Whisper-small + pyannote 3.1 · ~4 % WER + 95 % DIAR**")
54
- gr.Markdown("Fullkominn podcast-transcript með réttum mælendum")
55
 
56
- audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 5 mín)")
57
- btn = gr.Button("Transcribe með mælendum (40–90 sek)", variant="primary", size="lg")
58
- out = gr.Textbox(lines=35, label="Útskrift með mælendum")
59
 
60
- btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
61
 
62
  demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – Mælendagreining VIRKAR á ZeroGPU (2025 fix)
2
  import os
3
  import gradio as gr
4
  import spaces
 
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
11
+ @spaces.GPU(duration=120)
 
12
  def transcribe_with_diarization(audio_path):
13
  if not audio_path:
14
  return "Hladdu upp hljóðskrá"
15
 
16
+ # Mælendagreining – 2025 syntax
17
  diarization = Pipeline.from_pretrained(
18
  "pyannote/speaker-diarization-3.1",
19
+ token=os.getenv("HF_TOKEN") # ← FIX
20
  ).to("cuda")
21
 
22
  dia_result = diarization(audio_path)
23
 
24
+ # Whisper-small
25
  asr = pipeline(
26
  "automatic-speech-recognition",
27
  model=MODEL_NAME,
 
31
 
32
  full_text = ""
33
  for turn, _, speaker in dia_result.itertracks(yield_label=True):
 
 
 
 
34
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
35
  dia_result.crop(audio_path, turn).export(tmp.name, format="wav")
36
  segment_path = tmp.name
37
 
38
  text = asr(segment_path)["text"].strip()
39
  full_text += f"[MÆLENDI {speaker}] {text}\n"
40
+ os.unlink(segment_path)
 
41
 
42
  return full_text or "Ekkert heyrt"
43
 
44
+ with gr.Blocks() as demo:
 
45
  gr.Markdown("# Íslenskt ASR + Mælendagreining")
46
+ gr.Markdown("**Whisper-small + pyannote 3.1 · 2025 fix**")
 
47
 
48
+ audio = gr.Audio(type="filepath")
49
+ btn = gr.Button("Transcribe með mælendum", variant="primary")
50
+ out = gr.Textbox(lines=35)
51
 
52
+ btn.click(transcribe_with_diarization, audio, out)
53
 
54
  demo.launch(auth=("beta", "beta2025"))