palli23 commited on
Commit
a8f1bf8
·
verified ·
1 Parent(s): a90df61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Batch file transcription (up to 25 files, Icelandic forced)
2
 
3
  import os
4
  import gc
@@ -23,12 +23,13 @@ def transcribe_files(audio_files):
23
  if not audio_files:
24
  return None, "Hlaðið upp hljóðskrám"
25
 
26
- audio_files = audio_files[:25] # ✅ up to 25
27
 
28
  workdir = tempfile.mkdtemp()
29
  outdir = os.path.join(workdir, "transcripts")
30
  os.makedirs(outdir, exist_ok=True)
31
 
 
32
  pipe = pipeline(
33
  "automatic-speech-recognition",
34
  model="palli23/whisper-small-sam_spjall",
@@ -36,7 +37,14 @@ def transcribe_files(audio_files):
36
  device=0,
37
  )
38
 
39
- for idx, file in enumerate(audio_files, start=1):
 
 
 
 
 
 
 
40
  audio_path = file.name
41
  base = os.path.splitext(os.path.basename(audio_path))[0]
42
  txt_path = os.path.join(outdir, f"{base}.txt")
@@ -47,8 +55,6 @@ def transcribe_files(audio_files):
47
  batch_size=8,
48
  return_timestamps=False,
49
  generate_kwargs={
50
- "language": "is",
51
- "task": "transcribe",
52
  "num_beams": 5,
53
  "repetition_penalty": 1.2,
54
  "no_repeat_ngram_size": 3,
@@ -104,7 +110,6 @@ with gr.Blocks() as demo:
104
  # Launch
105
  # ——————————————————————————————
106
  demo.launch(
107
- share=True,
108
  server_name="0.0.0.0",
109
  server_port=7860,
110
  )
 
1
+ # app.py — Batch file transcription (up to 25 files, Icelandic forced, HF-safe)
2
 
3
  import os
4
  import gc
 
23
  if not audio_files:
24
  return None, "Hlaðið upp hljóðskrám"
25
 
26
+ audio_files = audio_files[:25]
27
 
28
  workdir = tempfile.mkdtemp()
29
  outdir = os.path.join(workdir, "transcripts")
30
  os.makedirs(outdir, exist_ok=True)
31
 
32
+ # Create ASR pipeline
33
  pipe = pipeline(
34
  "automatic-speech-recognition",
35
  model="palli23/whisper-small-sam_spjall",
 
37
  device=0,
38
  )
39
 
40
+ # 🔧 PATCH generation config (critical fix)
41
+ gen_cfg = pipe.model.generation_config
42
+ gen_cfg.language = "is"
43
+ gen_cfg.task = "transcribe"
44
+ gen_cfg.forced_decoder_ids = None # prevent conflicts
45
+ gen_cfg.suppress_tokens = None # avoid tokenizer mismatch
46
+
47
+ for file in audio_files:
48
  audio_path = file.name
49
  base = os.path.splitext(os.path.basename(audio_path))[0]
50
  txt_path = os.path.join(outdir, f"{base}.txt")
 
55
  batch_size=8,
56
  return_timestamps=False,
57
  generate_kwargs={
 
 
58
  "num_beams": 5,
59
  "repetition_penalty": 1.2,
60
  "no_repeat_ngram_size": 3,
 
110
  # Launch
111
  # ——————————————————————————————
112
  demo.launch(
 
113
  server_name="0.0.0.0",
114
  server_port=7860,
115
  )