palli23 commited on
Commit
399a407
·
1 Parent(s): c871a9c

diarization1Mæló

Browse files
Files changed (1) hide show
  1. app.py +19 -23
app.py CHANGED
@@ -1,7 +1,3 @@
1
- # ============================================================
2
- # app.py – Whisper-small + Pyannote 3.1 (ZeroGPU örugg)
3
- # ============================================================
4
-
5
  import os
6
  import gradio as gr
7
  import spaces
@@ -10,14 +6,19 @@ import torch
10
 
11
  from transformers import pipeline
12
  from pyannote.audio import Pipeline
13
- from torch.serialization import add_safe_globals
14
 
 
 
 
 
 
 
15
 
16
- # ================================================
17
- # Workaround fyrir PyTorch 2.6 weights-only unpickling
18
- # ================================================
19
  add_safe_globals({
20
- "Specifications": "pyannote.audio.core.task",
 
 
 
21
  })
22
 
23
 
@@ -32,8 +33,8 @@ def transcribe_with_diarization(audio_path):
32
  return "Hladdu upp hljóðskrá."
33
 
34
  # ----------------------------
35
- # 1. Load diarization pipeline
36
- # (ENGINN token parameter!)
37
  # ----------------------------
38
  diarization = Pipeline.from_pretrained(
39
  DIAR_MODEL,
@@ -43,7 +44,7 @@ def transcribe_with_diarization(audio_path):
43
  diar = diarization(audio_path)
44
 
45
  # ----------------------------
46
- # 2. Whisper ASR
47
  # ----------------------------
48
  asr = pipeline(
49
  task="automatic-speech-recognition",
@@ -51,9 +52,6 @@ def transcribe_with_diarization(audio_path):
51
  device=0
52
  )
53
 
54
- # ----------------------------
55
- # 3. Skera út segment + ASR
56
- # ----------------------------
57
  output_lines = []
58
 
59
  for turn, _, speaker in diar.itertracks(yield_label=True):
@@ -70,17 +68,15 @@ def transcribe_with_diarization(audio_path):
70
  return "\n".join(output_lines) or "Enginn texti fannst."
71
 
72
 
73
- # ------------------------------------------------------------
74
- # GRADIO UI
75
- # ------------------------------------------------------------
76
  with gr.Blocks() as demo:
77
- gr.Markdown("# 🎙️ Íslenskt ASR + mælendagreining")
78
- gr.Markdown("Whisper-small + pyannote 3.1 (ZeroGPU örugg útgáfa)")
79
-
80
  audio = gr.Audio(type="filepath", label="Hlaða inn hljóði (.wav / .mp3)")
81
- out = gr.Textbox(lines=30, label="Útskrift með mælendum")
82
 
83
- btn = gr.Button("Transcribe með mælendum", variant="primary")
84
  btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
85
 
86
  demo.launch(auth=("beta", "beta2025"))
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import spaces
 
6
 
7
  from transformers import pipeline
8
  from pyannote.audio import Pipeline
 
9
 
10
+ # ==========================================================
11
+ # ZeroGPU SAFE GLOBALS FIX — PYANNOTE 3.1 CHECKPOINT COMPAT
12
+ # ==========================================================
13
+ from torch.serialization import add_safe_globals
14
+ from pyannote.audio.core.task import Specifications
15
+ from pyannote.audio.core.model import Model
16
 
 
 
 
17
  add_safe_globals({
18
+ "Specifications": Specifications,
19
+ "pyannote.audio.core.task.Specifications": Specifications,
20
+ "Model": Model,
21
+ "pyannote.audio.core.model.Model": Model,
22
  })
23
 
24
 
 
33
  return "Hladdu upp hljóðskrá."
34
 
35
  # ----------------------------
36
+ # Load diarization pipeline
37
+ # (NO token argument!)
38
  # ----------------------------
39
  diarization = Pipeline.from_pretrained(
40
  DIAR_MODEL,
 
44
  diar = diarization(audio_path)
45
 
46
  # ----------------------------
47
+ # Whisper ASR
48
  # ----------------------------
49
  asr = pipeline(
50
  task="automatic-speech-recognition",
 
52
  device=0
53
  )
54
 
 
 
 
55
  output_lines = []
56
 
57
  for turn, _, speaker in diar.itertracks(yield_label=True):
 
68
  return "\n".join(output_lines) or "Enginn texti fannst."
69
 
70
 
71
+ # ==========================================================
72
+ # UI
73
+ # ==========================================================
74
  with gr.Blocks() as demo:
75
+ gr.Markdown("# 🎙️ Íslenskt ASR + mælendagreining (ZeroGPU)")
 
 
76
  audio = gr.Audio(type="filepath", label="Hlaða inn hljóði (.wav / .mp3)")
77
+ out = gr.Textbox(lines=25, label="Útskrift")
78
 
79
+ btn = gr.Button("Transcribe")
80
  btn.click(transcribe_with_diarization, inputs=audio, outputs=out)
81
 
82
  demo.launch(auth=("beta", "beta2025"))