Spaces:

Brightsun10
/

voice-recognition

Sleeping

App Files Files Community

Brightsun10 commited on Jul 12, 2025

Commit

ad8fa51

verified ·

1 Parent(s): 5cdc2ab

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -88

app.py CHANGED Viewed

@@ -1,89 +1,89 @@
-import gradio as gr
-import whisper
-import torch
-import time
-# --- MODEL INITIALIZATION ---
-# Check for GPU availability
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Load the Whisper model.
-# "base" is a good starting point. For higher accuracy, you can use "medium" or "large",
-# but they require more resources.
-print("Loading Whisper model...")
-model = whisper.load_model("base", device=device)
-print("Whisper model loaded successfully.")
-# --- TRANSCRIPTION FUNCTION ---
-def transcribe_audio(microphone_input, file_input):
-    """
-    Transcribes audio from either a microphone recording or an uploaded file.
-    Args:
-        microphone_input (tuple or None): Audio data from the microphone.
-        file_input (str or None): Path to the uploaded audio file.
-    Returns:
-        str: The transcribed text.
-    """
-    # Determine the input source
-    if microphone_input is not None:
-        audio_source = microphone_input
-    elif file_input is not None:
-        audio_source = file_input
-    else:
-        return "No audio source provided. Please record or upload an audio file."
-    # Perform the transcription
-    try:
-        # The transcribe function returns a dictionary with the text
-        result = model.transcribe(audio_source)
-        transcription = result["text"]
-        return transcription
-    except Exception as e:
-        return f"An error occurred during transcription: {e}"
-# --- GRADIO INTERFACE ---
-# Use gr.Blocks for more complex layouts and custom styling
-with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🎙️ Professional Voice Recognition")
-    gr.Markdown(
-        "This application uses OpenAI's Whisper model to transcribe speech to text. "
-        "You can either record audio directly from your microphone or upload an audio file."
-    )
-    with gr.Row(elem_classes="audio-container"):
-        with gr.Column():
-            # Microphone input
-            mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone")
-            # File upload input
-            file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
-    # Transcribe Button
-    transcribe_button = gr.Button("Transcribe Audio")
-    # Transcription Output
-    output_text = gr.Textbox(
-        lines=10,
-        label="Transcription Result",
-        placeholder="Your transcribed text will appear here...",
-        elem_id="transcription_output"
-    )
-    # Define the action for the button click
-    transcribe_button.click(
-        fn=transcribe_audio,
-        inputs=[mic_input, file_upload],
-        outputs=output_text
-    )
-# Launch the application
-if __name__ == "__main__":
     demo.launch(debug=True)

+import gradio as gr
+import whisper
+import torch
+import time
+# --- MODEL INITIALIZATION ---
+# Check for GPU availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load the Whisper model.
+# "base" is a good starting point. For higher accuracy, you can use "medium" or "large",
+# but they require more resources.
+print("Loading Whisper model...")
+model = whisper.load_model("base", device=device)
+print("Whisper model loaded successfully.")
+# --- TRANSCRIPTION FUNCTION ---
+def transcribe_audio(microphone_input, file_input):
+    """
+    Transcribes audio from either a microphone recording or an uploaded file.
+    Args:
+        microphone_input (tuple or None): Audio data from the microphone.
+        file_input (str or None): Path to the uploaded audio file.
+    Returns:
+        str: The transcribed text.
+    """
+    # Determine the input source
+    if microphone_input is not None:
+        audio_source = microphone_input
+    elif file_input is not None:
+        audio_source = file_input
+    else:
+        return "No audio source provided. Please record or upload an audio file."
+    # Perform the transcription
+    try:
+        # The transcribe function returns a dictionary with the text
+        result = model.transcribe(audio_source)
+        transcription = result["text"]
+        return transcription
+    except Exception as e:
+        return f"An error occurred during transcription: {e}"
+# --- GRADIO INTERFACE ---
+# Use gr.Blocks for more complex layouts and custom styling
+with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎙️ Voice Recognition")
+    gr.Markdown(
+        "This application uses OpenAI's Whisper model to transcribe speech to text. "
+        "You can either record audio directly from your microphone or upload an audio file."
+    )
+    with gr.Row(elem_classes="audio-container"):
+        with gr.Column():
+            # Microphone input
+            mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone")
+            # File upload input
+            file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
+    # Transcribe Button
+    transcribe_button = gr.Button("Transcribe Audio")
+    # Transcription Output
+    output_text = gr.Textbox(
+        lines=10,
+        label="Transcription Result",
+        placeholder="Your transcribed text will appear here...",
+        elem_id="transcription_output"
+    )
+    # Define the action for the button click
+    transcribe_button.click(
+        fn=transcribe_audio,
+        inputs=[mic_input, file_upload],
+        outputs=output_text
+    )
+# Launch the application
+if __name__ == "__main__":
     demo.launch(debug=True)