Brightsun10 commited on
Commit
ad8fa51
·
verified ·
1 Parent(s): 5cdc2ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -88
app.py CHANGED
@@ -1,89 +1,89 @@
1
- import gradio as gr
2
- import whisper
3
- import torch
4
- import time
5
-
6
- # --- MODEL INITIALIZATION ---
7
-
8
- # Check for GPU availability
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- print(f"Using device: {device}")
11
-
12
- # Load the Whisper model.
13
- # "base" is a good starting point. For higher accuracy, you can use "medium" or "large",
14
- # but they require more resources.
15
- print("Loading Whisper model...")
16
- model = whisper.load_model("base", device=device)
17
- print("Whisper model loaded successfully.")
18
-
19
-
20
- # --- TRANSCRIPTION FUNCTION ---
21
-
22
- def transcribe_audio(microphone_input, file_input):
23
- """
24
- Transcribes audio from either a microphone recording or an uploaded file.
25
-
26
- Args:
27
- microphone_input (tuple or None): Audio data from the microphone.
28
- file_input (str or None): Path to the uploaded audio file.
29
-
30
- Returns:
31
- str: The transcribed text.
32
- """
33
- # Determine the input source
34
- if microphone_input is not None:
35
- audio_source = microphone_input
36
- elif file_input is not None:
37
- audio_source = file_input
38
- else:
39
- return "No audio source provided. Please record or upload an audio file."
40
-
41
- # Perform the transcription
42
- try:
43
- # The transcribe function returns a dictionary with the text
44
- result = model.transcribe(audio_source)
45
- transcription = result["text"]
46
- return transcription
47
- except Exception as e:
48
- return f"An error occurred during transcription: {e}"
49
-
50
-
51
- # --- GRADIO INTERFACE ---
52
-
53
- # Use gr.Blocks for more complex layouts and custom styling
54
- with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo:
55
- gr.Markdown("# 🎙️ Professional Voice Recognition")
56
- gr.Markdown(
57
- "This application uses OpenAI's Whisper model to transcribe speech to text. "
58
- "You can either record audio directly from your microphone or upload an audio file."
59
- )
60
-
61
- with gr.Row(elem_classes="audio-container"):
62
- with gr.Column():
63
- # Microphone input
64
- mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone")
65
-
66
- # File upload input
67
- file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
68
-
69
- # Transcribe Button
70
- transcribe_button = gr.Button("Transcribe Audio")
71
-
72
- # Transcription Output
73
- output_text = gr.Textbox(
74
- lines=10,
75
- label="Transcription Result",
76
- placeholder="Your transcribed text will appear here...",
77
- elem_id="transcription_output"
78
- )
79
-
80
- # Define the action for the button click
81
- transcribe_button.click(
82
- fn=transcribe_audio,
83
- inputs=[mic_input, file_upload],
84
- outputs=output_text
85
- )
86
-
87
- # Launch the application
88
- if __name__ == "__main__":
89
  demo.launch(debug=True)
 
1
+ import gradio as gr
2
+ import whisper
3
+ import torch
4
+ import time
5
+
6
+ # --- MODEL INITIALIZATION ---
7
+
8
+ # Check for GPU availability
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ print(f"Using device: {device}")
11
+
12
+ # Load the Whisper model.
13
+ # "base" is a good starting point. For higher accuracy, you can use "medium" or "large",
14
+ # but they require more resources.
15
+ print("Loading Whisper model...")
16
+ model = whisper.load_model("base", device=device)
17
+ print("Whisper model loaded successfully.")
18
+
19
+
20
+ # --- TRANSCRIPTION FUNCTION ---
21
+
22
+ def transcribe_audio(microphone_input, file_input):
23
+ """
24
+ Transcribes audio from either a microphone recording or an uploaded file.
25
+
26
+ Args:
27
+ microphone_input (tuple or None): Audio data from the microphone.
28
+ file_input (str or None): Path to the uploaded audio file.
29
+
30
+ Returns:
31
+ str: The transcribed text.
32
+ """
33
+ # Determine the input source
34
+ if microphone_input is not None:
35
+ audio_source = microphone_input
36
+ elif file_input is not None:
37
+ audio_source = file_input
38
+ else:
39
+ return "No audio source provided. Please record or upload an audio file."
40
+
41
+ # Perform the transcription
42
+ try:
43
+ # The transcribe function returns a dictionary with the text
44
+ result = model.transcribe(audio_source)
45
+ transcription = result["text"]
46
+ return transcription
47
+ except Exception as e:
48
+ return f"An error occurred during transcription: {e}"
49
+
50
+
51
+ # --- GRADIO INTERFACE ---
52
+
53
+ # Use gr.Blocks for more complex layouts and custom styling
54
+ with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo:
55
+ gr.Markdown("# 🎙️ Voice Recognition")
56
+ gr.Markdown(
57
+ "This application uses OpenAI's Whisper model to transcribe speech to text. "
58
+ "You can either record audio directly from your microphone or upload an audio file."
59
+ )
60
+
61
+ with gr.Row(elem_classes="audio-container"):
62
+ with gr.Column():
63
+ # Microphone input
64
+ mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone")
65
+
66
+ # File upload input
67
+ file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
68
+
69
+ # Transcribe Button
70
+ transcribe_button = gr.Button("Transcribe Audio")
71
+
72
+ # Transcription Output
73
+ output_text = gr.Textbox(
74
+ lines=10,
75
+ label="Transcription Result",
76
+ placeholder="Your transcribed text will appear here...",
77
+ elem_id="transcription_output"
78
+ )
79
+
80
+ # Define the action for the button click
81
+ transcribe_button.click(
82
+ fn=transcribe_audio,
83
+ inputs=[mic_input, file_upload],
84
+ outputs=output_text
85
+ )
86
+
87
+ # Launch the application
88
+ if __name__ == "__main__":
89
  demo.launch(debug=True)