Bouaziz-bad commited on
Commit
f46eab4
·
1 Parent(s): 8a6c8ee

Introduce modular transcription app: frontend + backend

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. app.py +19 -29
  3. backend.py +31 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -11,7 +11,8 @@ tags:
11
  - audio
12
  - speech-to-text
13
  - whisper
14
- - transcription
15
  models:
16
  - openai/whisper-small
 
 
17
  ---
 
11
  - audio
12
  - speech-to-text
13
  - whisper
 
14
  models:
15
  - openai/whisper-small
16
+ preload_from_hub:
17
+ - openai/whisper-small
18
  ---
app.py CHANGED
@@ -1,43 +1,33 @@
 
1
  import gradio as gr
2
- import torch
3
- from transformers import pipeline
4
 
5
- # Load the Whisper model for speech recognition
6
- device = 0 if torch.cuda.is_available() else -1
7
- transcriber = pipeline(
8
- "automatic-speech-recognition",
9
- model="openai/whisper-small",
10
- chunk_length_s=30,
11
- device=device
12
- )
13
 
14
  def transcribe_audio(audio_file):
15
- """
16
- Transcribe uploaded audio file.
17
- Args:
18
- audio_file (str): Path to the audio file
19
- Returns:
20
- str: Transcribed text
21
- """
22
- if audio_file is None:
23
- return "Please upload an audio file."
24
-
25
- # Run transcription
26
- result = transcriber(audio_file)
27
- return result["text"]
28
 
29
- # Define the Gradio interface
30
  demo = gr.Interface(
31
  fn=transcribe_audio,
32
- inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
33
- outputs=gr.Textbox(label="Transcription", lines=8),
 
 
 
 
34
  title="🎙️ Audio Transcription with Whisper",
35
- description="Upload an audio file or record directly to transcribe it using OpenAI's Whisper model.",
36
  examples=[
37
- ["example.wav"] # Optional: add a sample audio file named example.wav
38
  ],
 
39
  allow_flagging="never"
40
  )
41
 
42
  # Launch the app
43
- demo.launch()
 
 
1
+ # app.py
2
  import gradio as gr
 
 
3
 
4
+ # Import backend
5
+ from backend import AudioTranscriber
6
+
7
+ # Initialize the backend transcriber
8
+ transcriber = AudioTranscriber()
 
 
 
9
 
10
  def transcribe_audio(audio_file):
11
+ return transcriber.transcribe(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Define Gradio Interface
14
  demo = gr.Interface(
15
  fn=transcribe_audio,
16
+ inputs=gr.Audio(
17
+ label="Upload or Record Audio",
18
+ sources=["upload", "microphone"],
19
+ type="filepath"
20
+ ),
21
+ outputs=gr.Textbox(label="Transcription", lines=6, placeholder="Transcription will appear here..."),
22
  title="🎙️ Audio Transcription with Whisper",
23
+ description="Upload an audio file or record your voice. The backend will transcribe it using Whisper.",
24
  examples=[
25
+ ["example.wav"] # Optional: include a sample file
26
  ],
27
+ cache_examples=False,
28
  allow_flagging="never"
29
  )
30
 
31
  # Launch the app
32
+ if __name__ == "__main__":
33
+ demo.launch()
backend.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend.py
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ class AudioTranscriber:
6
+ def __init__(self, model_name="openai/whisper-small"):
7
+ self.device = 0 if torch.cuda.is_available() else -1
8
+ print(f"Loading model '{model_name}' on device: {'GPU' if self.device == 0 else 'CPU'}")
9
+ self.pipe = pipeline(
10
+ "automatic-speech-recognition",
11
+ model=model_name,
12
+ chunk_length_s=30,
13
+ device=self.device
14
+ )
15
+
16
+ def transcribe(self, audio_path):
17
+ """
18
+ Transcribe an audio file.
19
+ Args:
20
+ audio_path (str): Path to the audio file
21
+ Returns:
22
+ str: Transcribed text
23
+ """
24
+ if audio_path is None:
25
+ return "No audio file provided."
26
+
27
+ try:
28
+ result = self.pipe(audio_path)
29
+ return result.get("text", "").strip()
30
+ except Exception as e:
31
+ return f"Transcription error: {str(e)}"
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch==2.3.1
2
+ transformers==4.41.2
3
+ gradio==4.25.0