TextGeneratorSmall

Sleeping

App Files Files Community

palli23 commited on 18 days ago

Commit

a90df61

verified ·

1 Parent(s): 8e021af

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -26

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — Batch file transcription (up to 10 files)
 import os
 import gc
@@ -10,9 +10,11 @@ import spaces
 from transformers import pipeline
 import torch
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 # ——————————————————————————————
 # ZeroGPU worker – model loaded once
 # ——————————————————————————————
@@ -20,85 +22,84 @@ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 def transcribe_files(audio_files):
     if not audio_files:
         return None, "Hlaðið upp hljóðskrám"
-    audio_files = audio_files[:10]
     workdir = tempfile.mkdtemp()
     outdir = os.path.join(workdir, "transcripts")
     os.makedirs(outdir, exist_ok=True)
-    # Create pipeline
     pipe = pipeline(
         "automatic-speech-recognition",
         model="palli23/whisper-small-sam_spjall",
         torch_dtype=torch.float16,
         device=0,
     )
-    # Force Icelandic language using tokenizer
-    forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="is", task="transcribe")
-    for file in audio_files:
         audio_path = file.name
         base = os.path.splitext(os.path.basename(audio_path))[0]
         txt_path = os.path.join(outdir, f"{base}.txt")
         result = pipe(
             audio_path,
             chunk_length_s=30,
             batch_size=8,
             return_timestamps=False,
             generate_kwargs={
-                "forced_decoder_ids": forced_decoder_ids,
                 "num_beams": 5,
                 "repetition_penalty": 1.2,
                 "no_repeat_ngram_size": 3,
                 "temperature": 0.0,
             },
         )
         with open(txt_path, "w", encoding="utf-8") as f:
             f.write(result["text"].strip())
     # Zip outputs
     zip_path = os.path.join(workdir, "transcripts.zip")
     with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
         for fname in os.listdir(outdir):
             z.write(os.path.join(outdir, fname), arcname=fname)
     # Cleanup
     del pipe
     gc.collect()
     torch.cuda.empty_cache()
-    return zip_path, "Lokið ✅"
 # ——————————————————————————————
 # UI
 # ——————————————————————————————
 with gr.Blocks() as demo:
-    gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)")
     gr.Markdown(
-        "**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3"
     )
     audio_in = gr.File(
-        label="Hlaðið upp allt að 10 .wav / .mp3 skrám",
         file_types=[".wav", ".mp3"],
         file_count="multiple",
     )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
     zip_out = gr.File(label="Niðurhal – transcripts.zip")
     status = gr.Textbox(label="Staða", interactive=False)
     btn.click(
         fn=transcribe_files,
         inputs=audio_in,
         outputs=[zip_out, status],
     )
 # ——————————————————————————————
 # Launch
 # ——————————————————————————————
@@ -106,4 +107,4 @@ demo.launch(
     share=True,
     server_name="0.0.0.0",
     server_port=7860,
-)

+# app.py — Batch file transcription (up to 25 files, Icelandic forced)
 import os
 import gc
 from transformers import pipeline
 import torch
+# Environment safety
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 # ——————————————————————————————
 # ZeroGPU worker – model loaded once
 # ——————————————————————————————
 def transcribe_files(audio_files):
     if not audio_files:
         return None, "Hlaðið upp hljóðskrám"
+    audio_files = audio_files[:25]  # ✅ up to 25
     workdir = tempfile.mkdtemp()
     outdir = os.path.join(workdir, "transcripts")
     os.makedirs(outdir, exist_ok=True)
     pipe = pipeline(
         "automatic-speech-recognition",
         model="palli23/whisper-small-sam_spjall",
         torch_dtype=torch.float16,
         device=0,
     )
+    for idx, file in enumerate(audio_files, start=1):
         audio_path = file.name
         base = os.path.splitext(os.path.basename(audio_path))[0]
         txt_path = os.path.join(outdir, f"{base}.txt")
         result = pipe(
             audio_path,
             chunk_length_s=30,
             batch_size=8,
             return_timestamps=False,
             generate_kwargs={
+                "language": "is",
+                "task": "transcribe",
                 "num_beams": 5,
                 "repetition_penalty": 1.2,
                 "no_repeat_ngram_size": 3,
                 "temperature": 0.0,
             },
         )
         with open(txt_path, "w", encoding="utf-8") as f:
             f.write(result["text"].strip())
     # Zip outputs
     zip_path = os.path.join(workdir, "transcripts.zip")
     with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
         for fname in os.listdir(outdir):
             z.write(os.path.join(outdir, fname), arcname=fname)
     # Cleanup
     del pipe
     gc.collect()
     torch.cuda.empty_cache()
+    return zip_path, f"Lokið ✅ ({len(audio_files)} skrár)"
 # ——————————————————————————————
 # UI
 # ——————————————————————————————
 with gr.Blocks() as demo:
+    gr.Markdown("# Íslenskt ASR – Batch (allt að 25 skrár)")
     gr.Markdown(
+        "**palli23/whisper-small-sam_spjall** · íslenska föst · .wav / .mp3"
     )
     audio_in = gr.File(
+        label="Hlaðið upp allt að 25 .wav / .mp3 skrám",
         file_types=[".wav", ".mp3"],
         file_count="multiple",
     )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
     zip_out = gr.File(label="Niðurhal – transcripts.zip")
     status = gr.Textbox(label="Staða", interactive=False)
     btn.click(
         fn=transcribe_files,
         inputs=audio_in,
         outputs=[zip_out, status],
     )
 # ——————————————————————————————
 # Launch
 # ——————————————————————————————
     share=True,
     server_name="0.0.0.0",
     server_port=7860,
+)