palli23 commited on
Commit
a90df61
·
verified ·
1 Parent(s): 8e021af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -26
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Batch file transcription (up to 10 files)
2
 
3
  import os
4
  import gc
@@ -10,9 +10,11 @@ import spaces
10
  from transformers import pipeline
11
  import torch
12
 
 
13
  os.environ["OMP_NUM_THREADS"] = "1"
14
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
15
 
 
16
  # ——————————————————————————————
17
  # ZeroGPU worker – model loaded once
18
  # ——————————————————————————————
@@ -20,85 +22,84 @@ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
20
  def transcribe_files(audio_files):
21
  if not audio_files:
22
  return None, "Hlaðið upp hljóðskrám"
23
-
24
- audio_files = audio_files[:10]
25
-
26
  workdir = tempfile.mkdtemp()
27
  outdir = os.path.join(workdir, "transcripts")
28
  os.makedirs(outdir, exist_ok=True)
29
-
30
- # Create pipeline
31
  pipe = pipeline(
32
  "automatic-speech-recognition",
33
  model="palli23/whisper-small-sam_spjall",
34
  torch_dtype=torch.float16,
35
  device=0,
36
  )
37
-
38
- # Force Icelandic language using tokenizer
39
- forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="is", task="transcribe")
40
-
41
- for file in audio_files:
42
  audio_path = file.name
43
  base = os.path.splitext(os.path.basename(audio_path))[0]
44
  txt_path = os.path.join(outdir, f"{base}.txt")
45
-
46
  result = pipe(
47
  audio_path,
48
  chunk_length_s=30,
49
  batch_size=8,
50
  return_timestamps=False,
51
  generate_kwargs={
52
- "forced_decoder_ids": forced_decoder_ids,
 
53
  "num_beams": 5,
54
  "repetition_penalty": 1.2,
55
  "no_repeat_ngram_size": 3,
56
  "temperature": 0.0,
57
  },
58
  )
59
-
60
  with open(txt_path, "w", encoding="utf-8") as f:
61
  f.write(result["text"].strip())
62
-
63
  # Zip outputs
64
  zip_path = os.path.join(workdir, "transcripts.zip")
65
  with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
66
  for fname in os.listdir(outdir):
67
  z.write(os.path.join(outdir, fname), arcname=fname)
68
-
69
  # Cleanup
70
  del pipe
71
  gc.collect()
72
  torch.cuda.empty_cache()
73
-
74
- return zip_path, "Lokið ✅"
 
75
 
76
  # ——————————————————————————————
77
  # UI
78
  # ——————————————————————————————
79
  with gr.Blocks() as demo:
80
- gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)")
81
  gr.Markdown(
82
- "**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3"
83
  )
84
-
85
  audio_in = gr.File(
86
- label="Hlaðið upp allt að 10 .wav / .mp3 skrám",
87
  file_types=[".wav", ".mp3"],
88
  file_count="multiple",
89
  )
90
-
91
  btn = gr.Button("Transcribe", variant="primary", size="lg")
92
-
93
  zip_out = gr.File(label="Niðurhal – transcripts.zip")
94
  status = gr.Textbox(label="Staða", interactive=False)
95
-
96
  btn.click(
97
  fn=transcribe_files,
98
  inputs=audio_in,
99
  outputs=[zip_out, status],
100
  )
101
 
 
102
  # ——————————————————————————————
103
  # Launch
104
  # ——————————————————————————————
@@ -106,4 +107,4 @@ demo.launch(
106
  share=True,
107
  server_name="0.0.0.0",
108
  server_port=7860,
109
- )
 
1
+ # app.py — Batch file transcription (up to 25 files, Icelandic forced)
2
 
3
  import os
4
  import gc
 
10
  from transformers import pipeline
11
  import torch
12
 
13
+ # Environment safety
14
  os.environ["OMP_NUM_THREADS"] = "1"
15
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
16
 
17
+
18
  # ——————————————————————————————
19
  # ZeroGPU worker – model loaded once
20
  # ——————————————————————————————
 
22
  def transcribe_files(audio_files):
23
  if not audio_files:
24
  return None, "Hlaðið upp hljóðskrám"
25
+
26
+ audio_files = audio_files[:25] # ✅ up to 25
27
+
28
  workdir = tempfile.mkdtemp()
29
  outdir = os.path.join(workdir, "transcripts")
30
  os.makedirs(outdir, exist_ok=True)
31
+
 
32
  pipe = pipeline(
33
  "automatic-speech-recognition",
34
  model="palli23/whisper-small-sam_spjall",
35
  torch_dtype=torch.float16,
36
  device=0,
37
  )
38
+
39
+ for idx, file in enumerate(audio_files, start=1):
 
 
 
40
  audio_path = file.name
41
  base = os.path.splitext(os.path.basename(audio_path))[0]
42
  txt_path = os.path.join(outdir, f"{base}.txt")
43
+
44
  result = pipe(
45
  audio_path,
46
  chunk_length_s=30,
47
  batch_size=8,
48
  return_timestamps=False,
49
  generate_kwargs={
50
+ "language": "is",
51
+ "task": "transcribe",
52
  "num_beams": 5,
53
  "repetition_penalty": 1.2,
54
  "no_repeat_ngram_size": 3,
55
  "temperature": 0.0,
56
  },
57
  )
58
+
59
  with open(txt_path, "w", encoding="utf-8") as f:
60
  f.write(result["text"].strip())
61
+
62
  # Zip outputs
63
  zip_path = os.path.join(workdir, "transcripts.zip")
64
  with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
65
  for fname in os.listdir(outdir):
66
  z.write(os.path.join(outdir, fname), arcname=fname)
67
+
68
  # Cleanup
69
  del pipe
70
  gc.collect()
71
  torch.cuda.empty_cache()
72
+
73
+ return zip_path, f"Lokið ✅ ({len(audio_files)} skrár)"
74
+
75
 
76
  # ——————————————————————————————
77
  # UI
78
  # ——————————————————————————————
79
  with gr.Blocks() as demo:
80
+ gr.Markdown("# Íslenskt ASR – Batch (allt að 25 skrár)")
81
  gr.Markdown(
82
+ "**palli23/whisper-small-sam_spjall** · íslenska föst · .wav / .mp3"
83
  )
84
+
85
  audio_in = gr.File(
86
+ label="Hlaðið upp allt að 25 .wav / .mp3 skrám",
87
  file_types=[".wav", ".mp3"],
88
  file_count="multiple",
89
  )
90
+
91
  btn = gr.Button("Transcribe", variant="primary", size="lg")
92
+
93
  zip_out = gr.File(label="Niðurhal – transcripts.zip")
94
  status = gr.Textbox(label="Staða", interactive=False)
95
+
96
  btn.click(
97
  fn=transcribe_files,
98
  inputs=audio_in,
99
  outputs=[zip_out, status],
100
  )
101
 
102
+
103
  # ——————————————————————————————
104
  # Launch
105
  # ——————————————————————————————
 
107
  share=True,
108
  server_name="0.0.0.0",
109
  server_port=7860,
110
+ )