Spaces:
Sleeping
Sleeping
Bouaziz-bad commited on
Commit ·
f46eab4
1
Parent(s): 8a6c8ee
Introduce modular transcription app: frontend + backend
Browse files- README.md +2 -1
- app.py +19 -29
- backend.py +31 -0
- requirements.txt +3 -0
README.md
CHANGED
|
@@ -11,7 +11,8 @@ tags:
|
|
| 11 |
- audio
|
| 12 |
- speech-to-text
|
| 13 |
- whisper
|
| 14 |
-
- transcription
|
| 15 |
models:
|
| 16 |
- openai/whisper-small
|
|
|
|
|
|
|
| 17 |
---
|
|
|
|
| 11 |
- audio
|
| 12 |
- speech-to-text
|
| 13 |
- whisper
|
|
|
|
| 14 |
models:
|
| 15 |
- openai/whisper-small
|
| 16 |
+
preload_from_hub:
|
| 17 |
+
- openai/whisper-small
|
| 18 |
---
|
app.py
CHANGED
|
@@ -1,43 +1,33 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import torch
|
| 3 |
-
from transformers import pipeline
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
chunk_length_s=30,
|
| 11 |
-
device=device
|
| 12 |
-
)
|
| 13 |
|
| 14 |
def transcribe_audio(audio_file):
|
| 15 |
-
|
| 16 |
-
Transcribe uploaded audio file.
|
| 17 |
-
Args:
|
| 18 |
-
audio_file (str): Path to the audio file
|
| 19 |
-
Returns:
|
| 20 |
-
str: Transcribed text
|
| 21 |
-
"""
|
| 22 |
-
if audio_file is None:
|
| 23 |
-
return "Please upload an audio file."
|
| 24 |
-
|
| 25 |
-
# Run transcription
|
| 26 |
-
result = transcriber(audio_file)
|
| 27 |
-
return result["text"]
|
| 28 |
|
| 29 |
-
# Define
|
| 30 |
demo = gr.Interface(
|
| 31 |
fn=transcribe_audio,
|
| 32 |
-
inputs=gr.Audio(
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
title="🎙️ Audio Transcription with Whisper",
|
| 35 |
-
description="Upload an audio file or record
|
| 36 |
examples=[
|
| 37 |
-
["example.wav"] # Optional:
|
| 38 |
],
|
|
|
|
| 39 |
allow_flagging="never"
|
| 40 |
)
|
| 41 |
|
| 42 |
# Launch the app
|
| 43 |
-
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
import gradio as gr
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
# Import backend
|
| 5 |
+
from backend import AudioTranscriber
|
| 6 |
+
|
| 7 |
+
# Initialize the backend transcriber
|
| 8 |
+
transcriber = AudioTranscriber()
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def transcribe_audio(audio_file):
|
| 11 |
+
return transcriber.transcribe(audio_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
# Define Gradio Interface
|
| 14 |
demo = gr.Interface(
|
| 15 |
fn=transcribe_audio,
|
| 16 |
+
inputs=gr.Audio(
|
| 17 |
+
label="Upload or Record Audio",
|
| 18 |
+
sources=["upload", "microphone"],
|
| 19 |
+
type="filepath"
|
| 20 |
+
),
|
| 21 |
+
outputs=gr.Textbox(label="Transcription", lines=6, placeholder="Transcription will appear here..."),
|
| 22 |
title="🎙️ Audio Transcription with Whisper",
|
| 23 |
+
description="Upload an audio file or record your voice. The backend will transcribe it using Whisper.",
|
| 24 |
examples=[
|
| 25 |
+
["example.wav"] # Optional: include a sample file
|
| 26 |
],
|
| 27 |
+
cache_examples=False,
|
| 28 |
allow_flagging="never"
|
| 29 |
)
|
| 30 |
|
| 31 |
# Launch the app
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
demo.launch()
|
backend.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend.py
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
class AudioTranscriber:
|
| 6 |
+
def __init__(self, model_name="openai/whisper-small"):
|
| 7 |
+
self.device = 0 if torch.cuda.is_available() else -1
|
| 8 |
+
print(f"Loading model '{model_name}' on device: {'GPU' if self.device == 0 else 'CPU'}")
|
| 9 |
+
self.pipe = pipeline(
|
| 10 |
+
"automatic-speech-recognition",
|
| 11 |
+
model=model_name,
|
| 12 |
+
chunk_length_s=30,
|
| 13 |
+
device=self.device
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
def transcribe(self, audio_path):
|
| 17 |
+
"""
|
| 18 |
+
Transcribe an audio file.
|
| 19 |
+
Args:
|
| 20 |
+
audio_path (str): Path to the audio file
|
| 21 |
+
Returns:
|
| 22 |
+
str: Transcribed text
|
| 23 |
+
"""
|
| 24 |
+
if audio_path is None:
|
| 25 |
+
return "No audio file provided."
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
result = self.pipe(audio_path)
|
| 29 |
+
return result.get("text", "").strip()
|
| 30 |
+
except Exception as e:
|
| 31 |
+
return f"Transcription error: {str(e)}"
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch==2.3.1
|
| 2 |
+
transformers==4.41.2
|
| 3 |
+
gradio==4.25.0
|