Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import os | |
| import torch | |
| tokenizer = Wav2Vec2Processor.from_pretrained('jonatasgrosman/wav2vec2-large-xlsr-53-portuguese') | |
| model = Wav2Vec2ForCTC.from_pretrained('jonatasgrosman/wav2vec2-large-xlsr-53-portuguese') | |
| # Load the pre-trained speech recognition model | |
| recognizer = sr.Recognizer() | |
| def recognize_speech(audio_path): | |
| print(audio_path) | |
| # Perform speech recognition on the captured audio | |
| try: | |
| clip = AudioSegment.from_file(audio_path) | |
| clip = clip.set_frame_rate(16000) | |
| print(clip) | |
| x = torch.FloatTensor(clip.get_array_of_samples()) | |
| inputs = tokenizer(x, sampling_rate=16000, return_tensors='pt', padding='longest').input_values | |
| logits = model(inputs).logits | |
| tokens = torch.argmax(logits, axis=-1) | |
| text = tokenizer.batch_decode(tokens) | |
| return str(text).lower() | |
| except sr.UnknownValueError: | |
| return "Could not understand the audio." | |
| except sr.RequestError as e: | |
| return f"Error accessing the Google Speech Recognition service: {e}" | |
| # Create the Gradio interface with microphone input | |
| audio_recognizer_interface = gr.Interface( | |
| fn=recognize_speech, | |
| inputs=gr.inputs.Audio(source="microphone", type="filepath", label="Speak into the microphone..."), | |
| outputs="text", | |
| title="Real-time Speech Recognition" | |
| ) | |
| # Run the interface | |
| audio_recognizer_interface.launch() | |