palli23 commited on
Commit
3b102fc
·
verified ·
1 Parent(s): af5ceec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -98
app.py CHANGED
@@ -1,132 +1,86 @@
1
- # app.py — Íslenskt ASR – ZeroGPU Fully Fixed (Dec 2025 handles str audio + CUDA safe)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
5
- # Disable CUDA visibility at startup to prevent main process init
6
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
7
 
8
  import gradio as gr
9
  import spaces
10
  from transformers import pipeline
11
  import torch
12
  import gc
13
- import librosa # For loading audio bytes if needed
14
- import io
15
- import soundfile as sf # For writing temp files from bytes
16
 
 
 
 
17
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
18
 
19
- @spaces.GPU(duration=180, max_batch_size=4)
20
- def transcribe_3min_gpu(audio_input):
21
- """
22
- Handles both filepath (str) and uploaded bytes/temp files.
23
- Loads model on CPU first, moves to GPU inside worker.
24
- """
25
- if not audio_input:
26
- return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
27
-
28
- # Handle str (filepath) vs bytes/tuple from Gradio upload
29
- if isinstance(audio_input, str):
30
- audio_path = audio_input
31
- else:
32
- # audio_input is tuple (filepath, tuple(bytes, sample_rate)) or just bytes
33
- if isinstance(audio_input, tuple) and len(audio_input) > 0:
34
- audio_path = audio_input[0] # Temp filepath from upload
35
- else:
36
- # Fallback: write bytes to temp file if no path
37
- if isinstance(audio_input, bytes):
38
- audio_bytes = audio_input
39
- else:
40
- return "Ógild hljóðskrá reyndu aftur"
41
-
42
- # Assume 16kHz sample rate for Whisper (common fallback)
43
- sample_rate = 16000
44
- # Load with librosa if needed, but for simplicity write to temp
45
- with io.BytesIO(audio_bytes) as audio_io:
46
- # Use soundfile to write temp wav
47
- with sf.SoundFile(audio_io, 'r') as f:
48
- data, sr = f.read(), f.samplerate
49
- if sr != 16000:
50
- data = librosa.resample(data, orig_sr=sr, target_sr=16000)
51
- # Write to temp file
52
- audio_path = "/tmp/temp_audio.wav"
53
- sf.write(audio_path, data, 16000)
54
-
55
  try:
56
- print("Loading Whisper model on CPU first (safe for ZeroGPU)...")
57
- # Load pipeline on CPU – no CUDA touch
58
- pipe = pipeline(
59
- "automatic-speech-recognition",
60
- model=MODEL_NAME,
61
- torch_dtype=torch.float16,
62
- device="cpu", # Critical: CPU init only
63
- token=os.getenv("HF_TOKEN"),
64
- )
65
-
66
- # Now inside GPU worker: move entire pipeline to CUDA
67
- print("Moving pipeline to GPU (ZeroGPU safe)...")
68
- pipe = pipe.to("cuda")
69
-
70
- # Run inference
71
- print("Running transcription...")
72
  result = pipe(
73
  audio_path,
74
  chunk_length_s=30,
75
  stride_length_s=(6, 0),
76
- batch_size=4, # Smaller batch for ZeroGPU stability
77
  return_timestamps=False,
78
- generate_kwargs={"language": "is", "task": "transcribe"},
79
  )
80
-
81
- text = result["text"].strip()
82
-
83
- # Cleanup
84
  if "chunks" in result:
85
  del result["chunks"]
86
-
87
- # Delete pipe immediately to free memory
88
- del pipe
89
  gc.collect()
90
- if torch.cuda.is_available():
91
- torch.cuda.empty_cache()
92
-
93
- return text if text else "(ekkert tal greint)"
94
-
95
  except torch.cuda.OutOfMemoryError:
 
 
96
  gc.collect()
97
- if torch.cuda.is_available():
98
- torch.cuda.empty_cache()
99
- return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
100
-
101
  except Exception as e:
102
- return f"Óvænt villa: {str(e)}"
 
103
 
104
- # ————————————————————— UI —————————————————————
105
- with gr.Blocks(title="Íslenskt ASR") as demo:
106
- gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
107
- gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
108
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
109
- gr.Markdown("> Keyrt á **ZeroGPU** – hver umritun hleðst nýtt (15–30 sek), stöðug og örugg")
110
 
111
  audio_in = gr.Audio(
112
- type="filepath", # Ensures str output for pipeline
113
- label="Hlaðið upp .mp3 / .wav (allt 5 mín)",
114
- sources=["upload", "microphone"]
115
  )
116
- btn = gr.Button("Umrita", variant="primary", size="lg")
117
- output = gr.Textbox(lines=25, label="Texti", placeholder="Hljóðtextinn birtist hér...")
118
-
119
- # Click event uses GPU-decorated fn
120
- btn.click(fn=transcribe_3min_gpu, inputs=audio_in, outputs=output)
121
 
122
- gr.Markdown("""
123
- ### Leiðbeiningar
124
- - Hver umritun: 15–30 sek (módel hleðst á GPU)
125
- - Styður upload og microphone – sjálfkrafa umbreytir í rétt format
126
- - Ef villa: bíddu og prófaðu aftur (ZeroGPU endurræsir)
127
- """)
128
 
129
- # ————————————————————— Launch —————————————————————
130
  demo.launch(
131
  auth=None,
132
  share=True,
 
1
+ # app.py — Íslenskt ASR – ZeroGPU ready (your original, just fixed for free tier)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
 
5
 
6
  import gradio as gr
7
  import spaces
8
  from transformers import pipeline
9
  import torch
10
  import gc
 
 
 
11
 
12
+ # ——————————————————————————————
13
+ # Model loaded ONLY when needed (ZeroGPU rule)
14
+ # ——————————————————————————————
15
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
16
 
17
+ @spaces.GPU(duration=180) # This keeps it alive + refreshes automatically
18
+ def get_pipe():
19
+ return pipeline(
20
+ "automatic-speech-recognition",
21
+ model=MODEL_NAME,
22
+ torch_dtype="float16",
23
+ device=0, # GPU inside the worker
24
+ token=os.getenv("HF_TOKEN"),
25
+ )
26
+
27
+ # Global pipe — will be created on first use
28
+ pipe = None
29
+
30
+ # Your original transcription function (unchanged except tiny safety)
31
+ def transcribe_3min(audio_path):
32
+ global pipe
33
+ if not audio_path:
34
+ return "Hlaðið upp hljóðskrá"
35
+
36
+ # Re-create pipe if something went wrong (OOM, crash, etc.)
37
+ if pipe is None:
38
+ print("Loading model (first use or refresh)...")
39
+ pipe = get_pipe()
40
+
 
 
 
 
 
 
 
 
 
 
 
 
41
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  result = pipe(
43
  audio_path,
44
  chunk_length_s=30,
45
  stride_length_s=(6, 0),
46
+ batch_size=8,
47
  return_timestamps=False,
 
48
  )
49
+ # Clean up memory aggressively (critical on ZeroGPU)
 
 
 
50
  if "chunks" in result:
51
  del result["chunks"]
 
 
 
52
  gc.collect()
53
+ torch.cuda.empty_cache()
54
+
55
+ return result["text"]
56
+
 
57
  except torch.cuda.OutOfMemoryError:
58
+ print("OOM → reloading model next run")
59
+ pipe = None
60
  gc.collect()
61
+ torch.cuda.empty_cache()
62
+ return "Of mikið minni → bíddu 10 sek og prófaðu aftur"
63
+
 
64
  except Exception as e:
65
+ pipe = None # Force reload next time
66
+ return f"Villa: {str(e)}"
67
 
68
+ # Your original UI — 100% unchanged —
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("# Íslenskt ASR – 3 mínútur")
71
+ gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt 5 mín hljóð**")
72
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
 
73
 
74
  audio_in = gr.Audio(
75
+ type="filepath",
76
+ label="Hlaðið upp .mp3 / .wav (max 5 mín)"
 
77
  )
78
+ btn = gr.Button("Transcribe", variant="primary", size="lg")
79
+ output = gr.Textbox(lines=30, label="Útskrift")
 
 
 
80
 
81
+ btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 
 
 
 
 
82
 
83
+ # Public launch —
84
  demo.launch(
85
  auth=None,
86
  share=True,