palli23 commited on
Commit
45c12a4
·
verified ·
1 Parent(s): 9f7f868

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -36
app.py CHANGED
@@ -1,68 +1,116 @@
1
- # app.py — Íslenskt ASR – 3 mínútur (public, no login, with contact)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
5
 
6
  import gradio as gr
7
  import spaces
8
  from transformers import pipeline
 
 
9
 
10
  # ——————————————————————————————
11
- # Model loaded ONCE at startup (global)
12
  # ——————————————————————————————
13
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
 
14
 
15
- @spaces.GPU(duration=180)
16
- def get_pipe():
17
- return pipeline(
18
- "automatic-speech-recognition",
19
- model=MODEL_NAME,
20
- torch_dtype="float16",
21
- device=0,
22
- token=os.getenv("HF_TOKEN"),
23
- )
 
 
 
 
 
24
 
25
- pipe = get_pipe()
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # ——————————————————————————————
28
- # Transcription function
29
  # ——————————————————————————————
30
  def transcribe_3min(audio_path):
31
  if not audio_path:
32
- return "Hladdu upp hljóðskrá"
33
-
34
- result = pipe(
35
- audio_path,
36
- chunk_length_s=30,
37
- stride_length_s=(6, 0),
38
- batch_size=8,
39
- return_timestamps=False,
40
- )
41
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # ——————————————————————————————
44
- # UI only added your email, nothing else changed
45
  # ——————————————————————————————
46
- with gr.Blocks() as demo: # ← removed 'theme=' (was causing error)
47
- gr.Markdown("# Íslenskt ASR – 3 mínútur")
48
- gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt 5 mín hljóð**")
49
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
50
-
 
51
  audio_in = gr.Audio(
52
  type="filepath",
53
- label="Hladdu upp .mp3 / .wav (max 5 mín)"
 
54
  )
55
- btn = gr.Button("Transcribe", variant="primary", size="lg")
56
- output = gr.Textbox(lines=30, label="Útskrift")
57
-
58
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 
 
 
 
 
59
 
60
  # ——————————————————————————————
61
- # PUBLIC NO LOGIN, NO PASSWORD
62
  # ——————————————————————————————
63
  demo.launch(
64
- auth=None, # ← No login
65
- share=True, # ← Public
66
  server_name="0.0.0.0",
67
  server_port=7860,
68
  show_error=True,
 
1
+ # app.py — Íslenskt ASR – ZeroGPU Optimized + Auto-Refresh + Memory Safe
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
5
 
6
  import gradio as gr
7
  import spaces
8
  from transformers import pipeline
9
+ import torch
10
+ import gc
11
 
12
  # ——————————————————————————————
13
+ # Global pipeline will be created on first call, rebuilt if GPU dies
14
  # ——————————————————————————————
15
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
16
+ pipe = None
17
 
18
+ @spaces.GPU(duration=180, max_batch_size=4) # 3-minute safety net + small batches
19
+ def get_or_refresh_pipeline():
20
+ global pipe
21
+
22
+ # If pipe exists but GPU ran out of memory → force rebuild
23
+ if pipe is not None:
24
+ try:
25
+ # Quick health check
26
+ _ = pipe.model.device
27
+ except Exception:
28
+ print("GPU context lost → rebuilding pipeline...")
29
+ pipe = None
30
+ gc.collect()
31
+ torch.cuda.empty_cache()
32
 
33
+ if pipe is None:
34
+ print("Loading Whisper model (cold start ~20s)...")
35
+ pipe = pipeline(
36
+ "automatic-speech-recognition",
37
+ model=MODEL_NAME,
38
+ torch_dtype=torch.float16,
39
+ device=0, # GPU 0
40
+ token=os.getenv("HF_TOKEN"), # optional, only needed for private models
41
+ )
42
+ # Force aggressive memory cleanup after load
43
+ torch.cuda.empty_cache()
44
+
45
+ return pipe
46
 
47
  # ——————————————————————————————
48
+ # Transcription function — super memory-safe
49
  # ——————————————————————————————
50
  def transcribe_3min(audio_path):
51
  if not audio_path:
52
+ return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
53
+
54
+ try:
55
+ pipe = get_or_refresh_pipeline()
56
+
57
+ result = pipe(
58
+ audio_path,
59
+ chunk_length_s=30,
60
+ stride_length_s=(6, 0),
61
+ batch_size=8,
62
+ return_timestamps=False,
63
+ generate_kwargs={"language": "is", "task": "transcribe"}, # force Icelandic
64
+ )
65
+
66
+ # Aggressive cleanup after every inference
67
+ del result["chunks"] if "chunks" in result else None
68
+ gc.collect()
69
+ torch.cuda.empty_cache()
70
+
71
+ return result["text"].strip()
72
+
73
+ except torch.cuda.OutOfMemoryError:
74
+ print("OOM caught → forcing full pipeline reload on next call")
75
+ global pipe
76
+ pipe = None
77
+ gc.collect()
78
+ torch.cuda.empty_cache()
79
+ return "Villa: Of mikið minni notað – endurhleð appinu og prófið aftur (ZeroGPU takmörkun)"
80
+
81
+ except Exception as e:
82
+ return f"Óvænt villa: {str(e)}"
83
 
84
  # ——————————————————————————————
85
+ # Gradio UI clean and reliable
86
  # ——————————————————————————————
87
+ with gr.Blocks(title="Íslenskt ASR") as demo:
88
+ gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
89
+ gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
90
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
91
+ gr.Markdown("> Keyrt á **ZeroGPU** – endurræsing eftir 3 mín óvirkni (eðlilegt)")
92
+
93
  audio_in = gr.Audio(
94
  type="filepath",
95
+ label="Hlaðið upp .mp3 eða .wav (allt 5 mínútur)",
96
+ sources=["upload", "microphone"]
97
  )
98
+ btn = gr.Button("Umrita", variant="primary", size="lg")
99
+ output = gr.Textbox(lines=25, label="Texti", placeholder="Hljóðtextinn birtist hér...")
100
+
101
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
102
+
103
+ gr.Markdown("### Athugasemdir\n"
104
+ "- ZeroGPU endurræsist sjálfkrafa → fyrsta umritun tekur 15–30 sek\n"
105
+ "- Eftir það mjög hröð (~5–15 sek fyrir 3 mín hljóð)\n"
106
+ "- Ef þú sérð 'Of mikið minni' → bíddu 10 sek og prófaðu aftur")
107
 
108
  # ——————————————————————————————
109
+ # Launch public, no login
110
  # ——————————————————————————————
111
  demo.launch(
112
+ auth=None,
113
+ share=True,
114
  server_name="0.0.0.0",
115
  server_port=7860,
116
  show_error=True,