Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| import torch | |
| import time | |
| # --- MODEL INITIALIZATION --- | |
| # Check for GPU availability | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| # Load the Whisper model. | |
| # "base" is a good starting point. For higher accuracy, you can use "medium" or "large", | |
| # but they require more resources. | |
| print("Loading Whisper model...") | |
| model = whisper.load_model("base", device=device) | |
| print("Whisper model loaded successfully.") | |
| # --- TRANSCRIPTION FUNCTION --- | |
| def transcribe_audio(microphone_input, file_input): | |
| """ | |
| Transcribes audio from either a microphone recording or an uploaded file. | |
| Args: | |
| microphone_input (tuple or None): Audio data from the microphone. | |
| file_input (str or None): Path to the uploaded audio file. | |
| Returns: | |
| str: The transcribed text. | |
| """ | |
| # Determine the input source | |
| if microphone_input is not None: | |
| audio_source = microphone_input | |
| elif file_input is not None: | |
| audio_source = file_input | |
| else: | |
| return "No audio source provided. Please record or upload an audio file." | |
| # Perform the transcription | |
| try: | |
| # The transcribe function returns a dictionary with the text | |
| result = model.transcribe(audio_source) | |
| transcription = result["text"] | |
| return transcription | |
| except Exception as e: | |
| return f"An error occurred during transcription: {e}" | |
| # --- GRADIO INTERFACE --- | |
| # Use gr.Blocks for more complex layouts and custom styling | |
| with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🎙️ Voice Recognition") | |
| gr.Markdown( | |
| "This application uses OpenAI's Whisper model to transcribe speech to text. " | |
| "You can either record audio directly from your microphone or upload an audio file." | |
| ) | |
| with gr.Row(elem_classes="audio-container"): | |
| with gr.Column(): | |
| # Microphone input | |
| mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone") | |
| # File upload input | |
| file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File") | |
| # Transcribe Button | |
| transcribe_button = gr.Button("Transcribe Audio") | |
| # Transcription Output | |
| output_text = gr.Textbox( | |
| lines=10, | |
| label="Transcription Result", | |
| placeholder="Your transcribed text will appear here...", | |
| elem_id="transcription_output" | |
| ) | |
| # Define the action for the button click | |
| transcribe_button.click( | |
| fn=transcribe_audio, | |
| inputs=[mic_input, file_upload], | |
| outputs=output_text | |
| ) | |
| # Launch the application | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) |