palli23 commited on
Commit
a0182fe
·
verified ·
1 Parent(s): cf36ab0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -85
app.py CHANGED
@@ -1,110 +1,70 @@
 
1
  import os
2
  os.environ["OMP_NUM_THREADS"] = "1"
3
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
4
 
5
  import gradio as gr
6
  import spaces
7
- import whisperx
8
 
9
- # -----------------------------
10
- # MODEL SETTINGS
11
- # -----------------------------
12
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
13
- HF_TOKEN = os.getenv("HF_TOKEN")
14
 
15
- # -----------------------------
16
- # LOAD MODELS ONCE (GPU)
17
- # -----------------------------
18
  @spaces.GPU(duration=180)
19
- def load_all_models():
20
- device = "cuda"
21
-
22
- # 1. Whisper-small model
23
- asr_model = whisperx.load_model(
24
- MODEL_NAME,
25
- device=device,
26
- compute_type="float16"
27
- )
28
-
29
- # 2. Alignment model
30
- align_model, metadata = whisperx.load_align_model(
31
- language_code="is",
32
- device=device
33
- )
34
-
35
- # 3. Diarization model (pyannote)
36
- diar_model = whisperx.DiarizationPipeline(
37
- model_name="pyannote/speaker-diarization-3.1",
38
- device=device,
39
- use_auth_token=HF_TOKEN
40
  )
41
 
42
- return asr_model, align_model, metadata, diar_model
43
-
44
- asr_model, align_model, align_metadata, diar_model = load_all_models()
45
-
46
-
47
- # -----------------------------
48
- # TRANSCRIPTION + DIARIZATION
49
- # -----------------------------
50
- def transcribe_is_with_diar(audio_path):
51
 
 
 
 
 
52
  if not audio_path:
53
  return "Hladdu upp hljóðskrá"
54
-
55
- # Load audio
56
- audio = whisperx.load_audio(audio_path)
57
-
58
- # --- 1. ASR with Whisper-small
59
- asr_result = asr_model.transcribe(
60
- audio,
61
- batch_size=8
62
- )
63
-
64
- # --- 2. Alignment (word timestamps)
65
- aligned = whisperx.align(
66
- asr_result["segments"],
67
- align_model,
68
- align_metadata,
69
- audio,
70
- device="cuda"
71
  )
 
72
 
73
- # --- 3. Diarization
74
- diarization = diar_model(audio)
75
-
76
- # --- 4. Merge diarization + words
77
- final = whisperx.assign_word_speakers(diarization, aligned)
78
-
79
- # Format output text
80
- output_lines = []
81
- for seg in final["segments"]:
82
- speaker = seg.get("speaker", "SPEAKER_00")
83
- text = seg.get("text", "")
84
- output_lines.append(f"[{speaker}] {text}")
85
-
86
- return "\n".join(output_lines)
87
-
88
-
89
- # -----------------------------
90
- # BUILD GRADIO UI
91
- # -----------------------------
92
- with gr.Blocks() as demo:
93
- gr.Markdown("# 🇮🇸 Íslenskt ASR + Raddgreining (Diarization)")
94
- gr.Markdown("**Whisper-small + WhisperX** — Hljóð allt að 5 mínútur")
95
 
96
  audio_in = gr.Audio(
97
  type="filepath",
98
- label="Hladdu upp hljóði (.mp3 / .wav)"
99
  )
100
- btn = gr.Button("Transcribe", variant="primary")
101
- output = gr.Textbox(lines=30, label="Útskrift með raddgreiningu")
102
 
103
- btn.click(fn=transcribe_is_with_diar, inputs=audio_in, outputs=output)
104
 
 
 
 
105
  demo.launch(
106
- auth=None,
107
- share=True,
108
  server_name="0.0.0.0",
109
- server_port=7860
110
- )
 
 
 
1
+ # app.py — Íslenskt ASR – 3 mínútur (public, no login, with contact)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
5
 
6
  import gradio as gr
7
  import spaces
8
+ from transformers import pipeline
9
 
10
+ # ——————————————————————————————
11
+ # Model loaded ONCE at startup (global)
12
+ # ——————————————————————————————
13
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
 
14
 
 
 
 
15
  @spaces.GPU(duration=180)
16
+ def get_pipe():
17
+ return pipeline(
18
+ "automatic-speech-recognition",
19
+ model=MODEL_NAME,
20
+ torch_dtype="float16",
21
+ device=0,
22
+ token=os.getenv("HF_TOKEN"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  )
24
 
25
+ pipe = get_pipe()
 
 
 
 
 
 
 
 
26
 
27
+ # ——————————————————————————————
28
+ # Transcription function
29
+ # ——————————————————————————————
30
+ def transcribe_3min(audio_path):
31
  if not audio_path:
32
  return "Hladdu upp hljóðskrá"
33
+
34
+ result = pipe(
35
+ audio_path,
36
+ chunk_length_s=30,
37
+ stride_length_s=(6, 0),
38
+ batch_size=8,
39
+ return_timestamps=False,
 
 
 
 
 
 
 
 
 
 
40
  )
41
+ return result["text"]
42
 
43
+ # ——————————————————————————————
44
+ # UI — only added your email, nothing else changed
45
+ # ——————————————————————————————
46
+ with gr.Blocks() as demo: # removed 'theme=' (was causing error)
47
+ gr.Markdown("# Íslenskt ASR – 3 mínútur")
48
+ gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
49
+ gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  audio_in = gr.Audio(
52
  type="filepath",
53
+ label="Hladdu upp .mp3 / .wav (max 5 mín)"
54
  )
55
+ btn = gr.Button("Transcribe", variant="primary", size="lg")
56
+ output = gr.Textbox(lines=30, label="Útskrift")
57
 
58
+ btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
59
 
60
+ # ——————————————————————————————
61
+ # PUBLIC — NO LOGIN, NO PASSWORD
62
+ # ——————————————————————————————
63
  demo.launch(
64
+ auth=None, # ← No login
65
+ share=True, # ← Public
66
  server_name="0.0.0.0",
67
+ server_port=7860,
68
+ show_error=True,
69
+ quiet=False
70
+ )