Spaces:

FineToon
/

Ai-Audio-Text-To-Text

Sleeping

App Files Files Community

AiCoderv2 commited on Sep 15, 2025

Commit

494e59f

verified ·

1 Parent(s): ec0d1b9

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -52

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from transformers import pipeline
 import gradio as gr
 # Updated model options with 2 new models
 MODEL_OPTIONS = {
@@ -29,7 +30,7 @@ LANGUAGE_CODES = {
     "Dutch": "nl"
 }
-def transcribe_audio(audio_file, model_choice, task_choice, language_choice):
     # Initialize the pipeline with selected model
     model_name = MODEL_OPTIONS[model_choice]
     task = "translate" if task_choice == "Translate to English" else "transcribe"
@@ -44,18 +45,32 @@ def transcribe_audio(audio_file, model_choice, task_choice, language_choice):
     )
     # Generate kwargs for the pipeline
-    generate_kwargs = {"task": task}
     if language and task == "transcribe":
         generate_kwargs["language"] = language
     # Process audio file
-    result = pipe(
-        audio_file,
-        generate_kwargs=generate_kwargs,
-        return_timestamps=False
-    )
-    return result["text"]
 with gr.Blocks() as demo:
     gr.Markdown("# 🎵 Audio Transcription & Translation")
@@ -65,8 +80,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             audio_input = gr.Audio(
                 label="Audio Input",
-                type="filepath",
-                source="upload"
             )
             # Updated model selection with new models
@@ -120,48 +134,8 @@ with gr.Blocks() as demo:
     transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
-    # Updated function to handle new features
-    def process_audio(audio_file, model_choice, task_choice, language_choice, timestamp_choice, beam_size):
-        model_name = MODEL_OPTIONS[model_choice]
-        task = "translate" if task_choice == "Translate to English" else "transcribe"
-        language = LANGUAGE_CODES[language_choice]
-        pipe = pipeline(
-            "automatic-speech-recognition",
-            model=model_name,
-            chunk_length_s=30,
-            device=0 if torch.cuda.is_available() else -1
-        )
-        generate_kwargs = {
-            "task": task,
-            "num_beams": beam_size
-        }
-        if language and task == "transcribe":
-            generate_kwargs["language"] = language
-        # Process with or without timestamps
-        if timestamp_choice:
-            result = pipe(
-                audio_file,
-                generate_kwargs=generate_kwargs,
-                return_timestamps=True
-            )
-            timestamp_text = "\n".join([
-                f"[{chunk['timestamp'][0]:.2f}s -> {chunk['timestamp'][1]:.2f}s] {chunk['text']}"
-                for chunk in result.get("chunks", [])
-            ])
-            return result["text"], timestamp_text, gr.update(visible=True)
-        else:
-            result = pipe(
-                audio_file,
-                generate_kwargs=generate_kwargs,
-                return_timestamps=False
-            )
-            return result["text"], "", gr.update(visible=False)
     transcribe_btn.click(
-        process_audio,
         inputs=[audio_input, model_choice, task_choice, language_choice, timestamp_choice, beam_size],
         outputs=[text_output, timestamp_output, timestamp_output]
     )

 from transformers import pipeline
 import gradio as gr
+import torch
 # Updated model options with 2 new models
 MODEL_OPTIONS = {
     "Dutch": "nl"
 }
+def transcribe_audio(audio_file, model_choice, task_choice, language_choice, timestamp_choice, beam_size):
     # Initialize the pipeline with selected model
     model_name = MODEL_OPTIONS[model_choice]
     task = "translate" if task_choice == "Translate to English" else "transcribe"
     )
     # Generate kwargs for the pipeline
+    generate_kwargs = {
+        "task": task,
+        "num_beams": beam_size
+    }
     if language and task == "transcribe":
         generate_kwargs["language"] = language
     # Process audio file
+    if timestamp_choice:
+        result = pipe(
+            audio_file,
+            generate_kwargs=generate_kwargs,
+            return_timestamps=True
+        )
+        timestamp_text = "\n".join([
+            f"[{chunk['timestamp'][0]:.2f}s -> {chunk['timestamp'][1]:.2f}s] {chunk['text']}"
+            for chunk in result.get("chunks", [])
+        ])
+        return result["text"], timestamp_text, gr.update(visible=True)
+    else:
+        result = pipe(
+            audio_file,
+            generate_kwargs=generate_kwargs,
+            return_timestamps=False
+        )
+        return result["text"], "", gr.update(visible=False)
 with gr.Blocks() as demo:
     gr.Markdown("# 🎵 Audio Transcription & Translation")
         with gr.Column():
             audio_input = gr.Audio(
                 label="Audio Input",
+                type="filepath"
             )
             # Updated model selection with new models
     transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
     transcribe_btn.click(
+        transcribe_audio,
         inputs=[audio_input, model_choice, task_choice, language_choice, timestamp_choice, beam_size],
         outputs=[text_output, timestamp_output, timestamp_output]
     )