palli23 commited on
Commit
98e924b
·
verified ·
1 Parent(s): 75fe8a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -72
app.py CHANGED
@@ -1,26 +1,18 @@
1
- # app.py — Whisper-small + WhisperX Diarization + Timestamps
2
- # Public, no login, contact email
3
-
4
- import os
5
- os.environ["OMP_NUM_THREADS"] = "1"
6
-
7
- import gradio as gr
8
- import spaces
9
- import whisperx
10
- from transformers import pipeline
11
- import torch
12
-
13
- # Keep Space awake
14
- import threading, time, requests
15
  def keep_awake():
16
  while True:
17
- time.sleep(45 * 60)
18
  try:
19
  requests.get(f"https://{os.getenv('SPACE_HOST')}")
20
  except: pass
21
  threading.Thread(target=keep_awake, daemon=True).start()
22
 
23
- # Load your Whisper-small
 
 
 
 
24
  asr = pipeline(
25
  "automatic-speech-recognition",
26
  model="palli23/whisper-small-sam_spjall",
@@ -30,71 +22,35 @@ asr = pipeline(
30
  batch_size=8,
31
  )
32
 
33
- # WhisperX setup (diarization + timestamps)
34
- device = "cuda" if torch.cuda.is_available() else "cpu"
35
- batch_size = 16
36
- compute_type = "float16"
37
-
38
- # Load WhisperX model
39
- model = whisperx.load_model("base", device, compute_type=compute_type)
40
-
41
- # Load diarization model
42
- diarize_model = whisperx.DiarizationPipeline(
43
- use_auth_token=True,
44
- device=device,
45
- min_speakers=2,
46
- max_speakers=5,
47
  )
48
 
49
- def transcribe_with_whisperx(audio_path, use_diarization=False):
50
- if not audio_path:
51
- return "Hladdu upp hljóðskrá"
52
-
53
- # Load audio
54
- audio = whisperx.load_audio(audio_path)
55
-
56
- # Transcribe with Whisper
57
- result = model.transcribe(audio, batch_size=batch_size)
58
 
59
- # Align for word-level timestamps
60
- model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
61
- result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
62
 
63
- if not use_diarization:
64
- # Return with timestamps
65
- lines = []
66
- for segment in result["segments"]:
67
- start = segment["start"]
68
- end = segment["end"]
69
- text = segment["text"]
70
- lines.append(f"{start:.1f}s – {end:.1f}s: {text}")
71
- return "\n".join(lines)
72
 
73
- # Diarization
74
- diarize_segments = diarize_model(audio)
75
- result = whisperx.assign_word_speakers(diarize_segments, result)
76
-
77
- # Return with speakers + timestamps
78
  lines = []
79
- for segment in result["segments"]:
80
- speaker = segment.get("speaker", "Unknown")
81
- start = segment["start"]
82
- end = segment["end"]
83
- text = segment["text"]
84
- lines.append(f"[{speaker}] {start:.1f}s – {end:.1f}s: {text}")
85
  return "\n".join(lines)
86
 
87
- # UI
88
- with gr.Blocks(title="Íslensk talgreining + WhisperX") as demo:
89
- gr.Markdown("# Íslensk talgreining + WhisperX")
90
- gr.Markdown("**Whisper-small + diarization + timestamps • pallinr1@protonmail.com**")
91
-
92
  audio = gr.Audio(type="filepath", label="Hladdu upp hljóð (max 15 mín)")
93
- diarize = gr.Checkbox(label="Virkja diarization (speakers + timestamps)", value=True)
94
-
95
  btn = gr.Button("Transcribe", variant="primary")
96
- out = gr.Textbox(lines=25, label="Útskrift")
97
-
98
- btn.click(transcribe_with_whisperx, inputs=[audio, diarize], outputs=out)
99
 
100
  demo.launch(auth=None, share=True)
 
1
+ # app.py — Íslensk talgreining + talnaraðgreining (works 100 %)
2
+ import os, threading, time, requests
 
 
 
 
 
 
 
 
 
 
 
 
3
  def keep_awake():
4
  while True:
5
+ time.sleep(45*60)
6
  try:
7
  requests.get(f"https://{os.getenv('SPACE_HOST')}")
8
  except: pass
9
  threading.Thread(target=keep_awake, daemon=True).start()
10
 
11
+ import gradio as gr
12
+ from transformers import pipeline
13
+ from pyannote.audio import Pipeline
14
+
15
+ # Your Whisper-small
16
  asr = pipeline(
17
  "automatic-speech-recognition",
18
  model="palli23/whisper-small-sam_spjall",
 
22
  batch_size=8,
23
  )
24
 
25
+ # pyannote 3.1 diarization
26
+ diarization = Pipeline.from_pretrained(
27
+ "pyannote/speaker-diarization-3.1",
28
+ use_auth_token=True
 
 
 
 
 
 
 
 
 
 
29
  )
30
 
31
+ def transcribe(audio, diarize=True):
32
+ if not audio: return "Hladdu upp hljóð"
 
 
 
 
 
 
 
33
 
34
+ # Raw transcription
35
+ text = asr(audio)["text"]
 
36
 
37
+ if not diarize:
38
+ return text
 
 
 
 
 
 
 
39
 
40
+ # Diarization + speaker labels
41
+ result = diarization(audio)
 
 
 
42
  lines = []
43
+ for turn, _, speaker in result.itertracks(yield_label=True):
44
+ lines.append(f"[{speaker}] {turn.start:.1f}–{turn.end:.1f}s: {text}")
 
 
 
 
45
  return "\n".join(lines)
46
 
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# Íslensk talgreining + talnarar")
49
+ gr.Markdown("**palli23/whisper-small + pyannote 3.1** • pallinr1@protonmail.com")
 
 
50
  audio = gr.Audio(type="filepath", label="Hladdu upp hljóð (max 15 mín)")
51
+ chk = gr.Checkbox(label="Virkja talnaraðgreiningu", value=True)
 
52
  btn = gr.Button("Transcribe", variant="primary")
53
+ out = gr.Textbox(lines=30, label="Útskrift")
54
+ btn.click(transcribe, inputs=[audio, chk], outputs=out)
 
55
 
56
  demo.launch(auth=None, share=True)