AiCoderv2 commited on
Commit
913772f
·
verified ·
1 Parent(s): 08bdda0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ import numpy as np
4
+
5
+ # Initialize automatic speech recognition pipeline
6
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
7
+
8
+ def transcribe_audio(audio):
9
+ if audio is None:
10
+ return "No audio provided. Please upload an audio file or record using the microphone."
11
+
12
+ try:
13
+ # Convert audio to numpy array
14
+ sr, data = audio
15
+ if len(data.shape) > 1:
16
+ # If stereo, convert to mono
17
+ data = np.mean(data, axis=1)
18
+
19
+ # Normalize audio data
20
+ data = data.astype(np.float32) / np.iinfo(data.dtype).max
21
+
22
+ # Transcribe audio
23
+ result = asr(data, sampling_rate=sr)
24
+ return result["text"]
25
+ except Exception as e:
26
+ return f"Error during transcription: {str(e)}"
27
+
28
+ # Gradio interface
29
+ with gr.Blocks(title="Speech to Text") as demo:
30
+ gr.Markdown("# 🎵 Speech to Text Transcription")
31
+ gr.Markdown("Convert audio to text using OpenAI's Whisper model")
32
+
33
+ with gr.Row():
34
+ with gr.Column():
35
+ audio_input = gr.Audio(
36
+ sources=["upload", "microphone"],
37
+ type="numpy",
38
+ label="Audio Input"
39
+ )
40
+ transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
41
+
42
+ with gr.Column():
43
+ text_output = gr.Textbox(
44
+ lines=10,
45
+ label="Transcription",
46
+ interactive=False
47
+ )
48
+
49
+ transcribe_btn.click(
50
+ transcribe_audio,
51
+ inputs=audio_input,
52
+ outputs=text_output
53
+ )
54
+
55
+ gr.Examples(
56
+ examples=[
57
+ ["example_audio_1.wav"],
58
+ ["example_audio_2.wav"]
59
+ ],
60
+ inputs=[audio_input],
61
+ )
62
+
63
+ gr.Markdown("### About This Model")
64
+ gr.Markdown("- **Model**: [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny)")
65
+ gr.Markdown("- **Task**: Automatic Speech Recognition (ASR)")
66
+ gr.Markdown("- **Capabilities**: Transcribes speech to text in multiple languages")
67
+ gr.Markdown("- **Note**: First transcription may take 10-20 seconds (model loading)")
68
+ gr.Markdown("- **Supported Formats**: WAV, MP3, M4A, FLAC")
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()