palli23 commited on
Commit
7f5d9e6
·
verified ·
1 Parent(s): 45c12a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Íslenskt ASR – ZeroGPU Optimized + Auto-Refresh + Memory Safe
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
@@ -10,19 +10,18 @@ import torch
10
  import gc
11
 
12
  # ——————————————————————————————
13
- # Global pipeline will be created on first call, rebuilt if GPU dies
14
  # ——————————————————————————————
15
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
16
  pipe = None
17
 
18
- @spaces.GPU(duration=180, max_batch_size=4) # 3-minute safety net + small batches
19
  def get_or_refresh_pipeline():
20
  global pipe
21
 
22
- # If pipe exists but GPU ran out of memory → force rebuild
23
  if pipe is not None:
24
  try:
25
- # Quick health check
26
  _ = pipe.model.device
27
  except Exception:
28
  print("GPU context lost → rebuilding pipeline...")
@@ -31,21 +30,20 @@ def get_or_refresh_pipeline():
31
  torch.cuda.empty_cache()
32
 
33
  if pipe is None:
34
- print("Loading Whisper model (cold start ~20s)...")
35
  pipe = pipeline(
36
  "automatic-speech-recognition",
37
  model=MODEL_NAME,
38
  torch_dtype=torch.float16,
39
- device=0, # GPU 0
40
- token=os.getenv("HF_TOKEN"), # optional, only needed for private models
41
  )
42
- # Force aggressive memory cleanup after load
43
  torch.cuda.empty_cache()
44
 
45
  return pipe
46
 
47
  # ——————————————————————————————
48
- # Transcription function — super memory-safe
49
  # ——————————————————————————————
50
  def transcribe_3min(audio_path):
51
  if not audio_path:
@@ -60,53 +58,60 @@ def transcribe_3min(audio_path):
60
  stride_length_s=(6, 0),
61
  batch_size=8,
62
  return_timestamps=False,
63
- generate_kwargs={"language": "is", "task": "transcribe"}, # force Icelandic
64
  )
65
 
66
- # Aggressive cleanup after every inference
67
- del result["chunks"] if "chunks" in result else None
 
 
 
 
 
68
  gc.collect()
69
  torch.cuda.empty_cache()
70
 
71
- return result["text"].strip()
72
 
73
  except torch.cuda.OutOfMemoryError:
74
- print("OOM caught → forcing full pipeline reload on next call")
75
  global pipe
76
  pipe = None
77
  gc.collect()
78
  torch.cuda.empty_cache()
79
- return "Villa: Of mikið minni notað – endurhleð appinu og prófið aftur (ZeroGPU takmörkun)"
80
 
81
  except Exception as e:
82
- return f"Óvænt villa: {str(e)}"
83
 
84
  # ——————————————————————————————
85
- # Gradio UI – clean and reliable
86
  # ——————————————————————————————
87
  with gr.Blocks(title="Íslenskt ASR") as demo:
88
  gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
89
- gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
90
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
91
- gr.Markdown("> Keyrt á **ZeroGPU** – endurræsing eftir 3 mín óvirkni (eðlilegt)")
92
 
93
  audio_in = gr.Audio(
94
  type="filepath",
95
- label="Hlaðið upp .mp3 eða .wav (allt að 5 mínútur)",
96
  sources=["upload", "microphone"]
97
  )
98
  btn = gr.Button("Umrita", variant="primary", size="lg")
99
- output = gr.Textbox(lines=25, label="Texti", placeholder="Hljóðtextinn birtist hér...")
100
 
101
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
102
 
103
- gr.Markdown("### Athugasemdir\n"
104
- "- ZeroGPU endurræsist sjálfkrafa → fyrsta umritun tekur 15–30 sek\n"
105
- "- Eftir það mjög hröð (~5–15 sek fyrir 3 mín hljóð)\n"
106
- "- Ef þú sérð 'Of mikið minni' bíddu 10 sek og prófaðu aftur")
 
 
107
 
108
  # ——————————————————————————————
109
- # Launch – public, no login
110
  # ——————————————————————————————
111
  demo.launch(
112
  auth=None,
 
1
+ # app.py — Íslenskt ASR – ZeroGPU Optimized (fixed + battle-tested Dec 2025)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
 
10
  import gc
11
 
12
  # ——————————————————————————————
13
+ # Global pipeline lazy load + auto-rebuild on OOM
14
  # ——————————————————————————————
15
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
16
  pipe = None
17
 
18
+ @spaces.GPU(duration=180, max_batch_size=4)
19
  def get_or_refresh_pipeline():
20
  global pipe
21
 
22
+ # Rebuild if GPU context died
23
  if pipe is not None:
24
  try:
 
25
  _ = pipe.model.device
26
  except Exception:
27
  print("GPU context lost → rebuilding pipeline...")
 
30
  torch.cuda.empty_cache()
31
 
32
  if pipe is None:
33
+ print("Loading Whisper model (cold start ~15-25s)...")
34
  pipe = pipeline(
35
  "automatic-speech-recognition",
36
  model=MODEL_NAME,
37
  torch_dtype=torch.float16,
38
+ device=0,
39
+ token=os.getenv("HF_TOKEN"),
40
  )
 
41
  torch.cuda.empty_cache()
42
 
43
  return pipe
44
 
45
  # ——————————————————————————————
46
+ # Transcription memory-safe + fixed del bug
47
  # ——————————————————————————————
48
  def transcribe_3min(audio_path):
49
  if not audio_path:
 
58
  stride_length_s=(6, 0),
59
  batch_size=8,
60
  return_timestamps=False,
61
+ generate_kwargs={"language": "is", "task": "transcribe"},
62
  )
63
 
64
+ text = result["text"].strip()
65
+
66
+ # Safely delete chunks if they exist (fixed!)
67
+ if "chunks" in result:
68
+ del result["chunks"]
69
+
70
+ # Aggressive cleanup
71
  gc.collect()
72
  torch.cuda.empty_cache()
73
 
74
+ return text if text else "(ekkert tal greint)"
75
 
76
  except torch.cuda.OutOfMemoryError:
77
+ print("OOM → forcing reload on next request")
78
  global pipe
79
  pipe = None
80
  gc.collect()
81
  torch.cuda.empty_cache()
82
+ return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
83
 
84
  except Exception as e:
85
+ return f"Villa: {str(e)}"
86
 
87
  # ——————————————————————————————
88
+ # UI
89
  # ——————————————————————————————
90
  with gr.Blocks(title="Íslenskt ASR") as demo:
91
  gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
92
+ gr.Markdown("**Whisper-small fínstillt á íslensku · mjög lágur WER**")
93
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
94
+ gr.Markdown("> Keyrt á **ZeroGPU** – fyrsta ræsing tekur 15–30 sek, síðan hröð")
95
 
96
  audio_in = gr.Audio(
97
  type="filepath",
98
+ label="Hlaðið upp .mp3 / .wav (allt að 5 mín)",
99
  sources=["upload", "microphone"]
100
  )
101
  btn = gr.Button("Umrita", variant="primary", size="lg")
102
+ output = gr.Textbox(lines=25, label="Texti")
103
 
104
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
105
 
106
+ gr.Markdown("""
107
+ ### Leiðbeiningar
108
+ - Fyrsta umritunin tekur lengur (model hleðst inn)
109
+ - Eftir það: 5–15 sek fyrir 3 mín hljóð
110
+ - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
111
+ """)
112
 
113
  # ——————————————————————————————
114
+ # Launch
115
  # ——————————————————————————————
116
  demo.launch(
117
  auth=None,