Spaces:

sifujohn
/

transcribe

Sleeping

App Files Files Community

sifujohn commited on Jul 31, 2025

Commit

6ae0891

verified ·

1 Parent(s): e72a7f1

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -25

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 import gradio as gr
 import torch
 from transformers import pipeline
-from transformers.pipelines.audio_utils import ffmpeg_read
-import time
 import os
-# Choose an open-source model (English only or multilingual)
-MODEL_NAME = "openai/whisper-small"  # or try "distil-whisper/distil-small.en"
 BATCH_SIZE = 8
-YT_LENGTH_LIMIT_S = 3600
 device = 0 if torch.cuda.is_available() else "cpu"
-# Load open-source model
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
@@ -19,40 +18,61 @@ pipe = pipeline(
     device=device,
 )
-# Transcribe function
 def transcribe(audio_path, task="transcribe"):
     if audio_path is None or not os.path.exists(audio_path):
-        raise gr.Error("Invalid file path.")
-    # Read the audio file using ffmpeg_read
-    audio_array = ffmpeg_read(audio_path, pipe.feature_extractor.sampling_rate)
-    # Ensure the audio data is in the correct format
-    inputs = {"array": audio_array, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    # Transcribe the audio
-    result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
     return result["text"]
-# Wrapper for file uploads
 def handle_audio(audio_path, task):
     try:
         return transcribe(audio_path, task)
     except Exception as e:
         return f"❌ Error: {str(e)}"
-# Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎙️ Free Whisper Speech-to-Text App\nPowered by Open Source Whisper from Hugging Face.")
     with gr.Tabs():
-        with gr.Tab("🎧 Upload Audio"):
             with gr.Row():
-                audio_input = gr.Audio(type="filepath", label="Upload audio file")
-                task_option = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
-            transcribe_btn = gr.Button("Transcribe")
-            result = gr.Textbox(label="📝 Transcription", lines=8)
-            transcribe_btn.click(handle_audio, inputs=[audio_input, task_option], outputs=result)
-demo.launch()

 import gradio as gr
 import torch
 from transformers import pipeline
+import librosa
 import os
+# Config
+MODEL_NAME = "openai/whisper-small"
 BATCH_SIZE = 8
+YT_LENGTH_LIMIT_S = 3600  # 1 hour
 device = 0 if torch.cuda.is_available() else "cpu"
+# Load model pipeline
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
     device=device,
 )
+# Duration check
+def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S):
+    duration = librosa.get_duration(path=audio_path)
+    return duration > limit
+# Transcription logic
 def transcribe(audio_path, task="transcribe"):
     if audio_path is None or not os.path.exists(audio_path):
+        raise gr.Error("❌ Invalid file path or missing audio.")
+    if is_too_long(audio_path):
+        raise gr.Error("⚠️ Audio exceeds 1-hour limit. Please upload a shorter file.")
+    result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
     return result["text"]
+# Wrapper with error handling
 def handle_audio(audio_path, task):
     try:
         return transcribe(audio_path, task)
     except Exception as e:
         return f"❌ Error: {str(e)}"
+# Gradio App
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️ Whisper Speech-to-Text App\nPowered by 🤗 Transformers and Gradio.\n\nUpload an audio file or record with your mic.")
     with gr.Tabs():
+        # Tab 1: Upload
+        with gr.Tab("📁 Upload Audio"):
+            with gr.Row():
+                audio_upload = gr.Audio(type="filepath", label="Upload Audio File")
+                task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
             with gr.Row():
+                transcribe_btn_upload = gr.Button("Transcribe")
+                output_upload = gr.Textbox(label="📝 Transcription", lines=8)
+            transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload)
+            gr.Examples(
+                examples=[
+                    "sample_audio/sample1.wav",
+                    "sample_audio/sample2.mp3"
+                ],
+                inputs=[audio_upload],
+                label="Example Audio Files"
+            )
+        # Tab 2: Record
+        with gr.Tab("🎤 Record Audio"):
+            with gr.Row():
+                audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone")
+                task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
+            with gr.Row():
+                transcribe_btn_record = gr.Button("Transcribe")
+                output_record = gr.Textbox(label="📝 Transcription", lines=8)
+            transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record)
+demo.launch()