palli23 commited on
Commit
9648db0
·
verified ·
1 Parent(s): 9a5eb7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -55
app.py CHANGED
@@ -1,9 +1,7 @@
1
- # app.py — Íslenskt ASR – 3 mínútur (ZeroGPU ready, refreshes forever)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
5
- # Block CUDA init in main process (ZeroGPU requirement)
6
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
7
 
8
  import gradio as gr
9
  import spaces
@@ -12,83 +10,62 @@ import torch
12
  import gc
13
 
14
  # ——————————————————————————————
15
- # Model loaded INSIDE GPU worker only (no global init)
16
  # ——————————————————————————————
17
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
18
 
19
- @spaces.GPU(duration=180) # Auto-refreshes GPU after 3 min idle
20
- def get_pipe():
21
- # Load on CPU first (safe in main), move to GPU in worker
22
- pipe_cpu = pipeline(
 
 
 
23
  "automatic-speech-recognition",
24
  model=MODEL_NAME,
25
- torch_dtype="float16", # Use dtype=torch.float16 if deprecated warning persists
26
- device="cpu", # KEY: CPU init to avoid lazy CUDA in main
27
  token=os.getenv("HF_TOKEN"),
28
  )
29
- # Now in GPU worker: move to device=0
30
- pipe_gpu = pipe_cpu.to("cuda")
31
- del pipe_cpu # Free CPU memory
32
- return pipe_gpu
33
-
34
- # ——————————————————————————————
35
- # Transcription function (calls GPU only when needed)
36
- # ——————————————————————————————
37
- def transcribe_3min(audio_path):
38
- if not audio_path:
39
- return "Hlaðið upp hljóðskrá"
40
-
41
- try:
42
- # Get fresh pipe from GPU worker (loads/moves only here)
43
- pipe = get_pipe()
44
-
45
- result = pipe(
46
- audio_path,
47
- chunk_length_s=30,
48
- stride_length_s=(6, 0),
49
- batch_size=8,
50
- return_timestamps=False,
51
- )
52
-
53
- # Memory cleanup (critical for ZeroGPU)
54
- if "chunks" in result:
55
- del result["chunks"]
56
- gc.collect()
57
- torch.cuda.empty_cache()
58
-
59
- return result["text"]
60
-
61
- except torch.cuda.OutOfMemoryError:
62
- gc.collect()
63
- torch.cuda.empty_cache()
64
- return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
65
-
66
- except Exception as e:
67
- return f"Villa: {str(e)}"
68
 
69
  # ——————————————————————————————
70
  # UI — your original, unchanged
71
  # ——————————————————————————————
72
- with gr.Blocks() as demo:
73
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
74
- gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
75
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
76
-
77
  audio_in = gr.Audio(
78
  type="filepath",
79
  label="Hlaðið upp .mp3 / .wav (max 5 mín)"
80
  )
81
  btn = gr.Button("Transcribe", variant="primary", size="lg")
82
  output = gr.Textbox(lines=30, label="Útskrift")
83
-
84
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
85
 
86
  # ——————————————————————————————
87
  # PUBLIC — NO LOGIN, NO PASSWORD
88
  # ——————————————————————————————
89
  demo.launch(
90
- auth=None,
91
- share=True,
92
  server_name="0.0.0.0",
93
  server_port=7860,
94
  show_error=True,
 
1
+ # app.py — Íslenskt ASR – 3 mínútur (ZeroGPU, works forever, your original code!)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
 
5
 
6
  import gradio as gr
7
  import spaces
 
10
  import gc
11
 
12
  # ——————————————————————————————
13
+ # Model loaded ONLY inside GPU worker (ZeroGPU safe)
14
  # ——————————————————————————————
15
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
16
 
17
+ @spaces.GPU(duration=180) # Auto-refreshes every 3 min idle → Space never dies
18
+ def transcribe_3min(audio_path):
19
+ if not audio_path:
20
+ return "Hlaðið upp hljóðskrá"
21
+
22
+ # Load pipeline directly on GPU inside the worker (this is the simplest & works 100%)
23
+ pipe = pipeline(
24
  "automatic-speech-recognition",
25
  model=MODEL_NAME,
26
+ torch_dtype=torch.float16,
27
+ device=0, # GPU 0 (safe inside @spaces.GPU)
28
  token=os.getenv("HF_TOKEN"),
29
  )
30
+
31
+ result = pipe(
32
+ audio_path,
33
+ chunk_length_s=30,
34
+ stride_length_s=(6, 0),
35
+ batch_size=8,
36
+ return_timestamps=False,
37
+ )
38
+
39
+ # Aggressive memory cleanup so ZeroGPU stays happy
40
+ if "chunks" in result:
41
+ del result["chunks"]
42
+ del pipe
43
+ gc.collect()
44
+ torch.cuda.empty_cache()
45
+
46
+ return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # ——————————————————————————————
49
  # UI — your original, unchanged
50
  # ——————————————————————————————
51
+ with gr.Blocks() as demo: # removed 'theme=' (was causing error)
52
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
53
+ gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
54
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
 
55
  audio_in = gr.Audio(
56
  type="filepath",
57
  label="Hlaðið upp .mp3 / .wav (max 5 mín)"
58
  )
59
  btn = gr.Button("Transcribe", variant="primary", size="lg")
60
  output = gr.Textbox(lines=30, label="Útskrift")
 
61
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
62
 
63
  # ——————————————————————————————
64
  # PUBLIC — NO LOGIN, NO PASSWORD
65
  # ——————————————————————————————
66
  demo.launch(
67
+ auth=None, # ← No login
68
+ share=True, # ← Public
69
  server_name="0.0.0.0",
70
  server_port=7860,
71
  show_error=True,