Spaces:

hamza2923
/

TranscriberApp

Sleeping

App Files Files Community

hamza2923 commited on Apr 21, 2025

Commit

99e2ed9

verified ·

1 Parent(s): 5ece0ce

Upload 2 files

Browse files

Files changed (2) hide show

app.py +76 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import gradio as gr
+from transformers import pipeline
+import torch
+from pydub import AudioSegment
+import os
+# Initialize the Whisper model
+try:
+    whisper = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-small",
+        device="cuda" if torch.cuda.is_available() else "cpu"
+    )
+except Exception as e:
+    raise Exception(f"Failed to load Whisper model: {str(e)}")
+# Define the transcription function with chunking and automatic language detection
+def transcribe_audio(audio):
+    if audio is None:
+        return "Error: Please upload an audio file."
+    # Validate file size (100 MB limit)
+    try:
+        file_size_mb = os.path.getsize(audio) / (1024 * 1024)
+        if file_size_mb > 100:
+            return "Error: Audio file exceeds 100 MB limit."
+    except FileNotFoundError:
+        return "Error: Audio file not found."
+    try:
+        # Load and process audio
+        audio_segment = AudioSegment.from_file(audio)
+        duration_ms = len(audio_segment)
+        chunk_length_ms = 30000  # 30 seconds
+        # Chunk long audio files
+        if duration_ms > chunk_length_ms:
+            chunks = [audio_segment[i:i + chunk_length_ms] for i in range(0, duration_ms, chunk_length_ms)]
+            transcriptions = []
+            for i, chunk in enumerate(chunks):
+                chunk_path = f"chunk_{i}.wav"
+                chunk.export(chunk_path, format="wav")
+                result = whisper(chunk_path, generate_kwargs={"task": "transcribe"})  # Automatic language detection
+                transcriptions.append(result["text"])
+                if os.path.exists(chunk_path):
+                    os.remove(chunk_path)
+            return " ".join(transcriptions)
+        else:
+            result = whisper(audio, generate_kwargs={"task": "transcribe"})  # Automatic language detection
+            return result["text"]
+    except Exception as e:
+        return f"Error during transcription: {str(e)}"
+    finally:
+        # Clean up uploaded file
+        if os.path.exists(audio):
+            try:
+                os.remove(audio)
+            except Exception:
+                pass
+# Create Gradio interface
+demo = gr.Interface(
+    fn=transcribe_audio,
+    inputs=[
+        gr.Audio(type="filepath", label="Upload an Audio File (MP3, WAV, max 100 MB)")
+    ],
+    outputs=gr.Textbox(label="Transcription"),
+    title="Audio to Text Transcription with Whisper",
+    description="Upload an audio file (MP3/WAV, up to 100 MB) to transcribe it using Open AI's Whisper model with automatic language detection.",
+    allow_flagging="never"
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers==4.44.2
+gradio==4.44.0
+torch==2.4.1
+pydub==0.25.1
+ffmpeg-python==0.2.0