Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import torch | |
| import torchaudio | |
| # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h") | |
| # Function to convert speech to text | |
| def speech_to_text(audio_file): | |
| # Load the audio file | |
| audio_input, _ = torchaudio.load(audio_file) | |
| # Preprocess the audio input (e.g., resample, normalize, etc.) | |
| input_values = processor(audio_input, return_tensors="pt").input_values | |
| # Perform speech-to-text (CTC Decoding) | |
| with torch.no_grad(): | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| # Decode the predicted ids to text | |
| transcription = processor.decode(predicted_ids[0]) | |
| return transcription | |
| # Set up the Gradio interface | |
| iface = gr.Interface( | |
| fn=speech_to_text, # Function to be executed | |
| inputs=gr.Audio(type="filepath"), # Correct type for file upload | |
| outputs=gr.Textbox(), # Display transcription in a text box | |
| title="Speech-to-Text Analyzer for Lectimport gradio as gr | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import torch | |
| import torchaudio | |
| # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h") | |
| # Function to convert speech to text | |
| def speech_to_text(audio_file): | |
| # Load the audio file | |
| audio_input, _ = torchaudio.load(audio_file) | |
| # Preprocess the audio input (e.g., resample, normalize, etc.) | |
| input_values = processor(audio_input, return_tensors="pt").input_values | |
| # Perform speech-to-text (CTC Decoding) | |
| with torch.no_grad(): | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| # Decode the predicted ids to text | |
| transcription = processor.decode(predicted_ids[0]) | |
| return transcription | |
| # Set up the Gradio interface | |
| iface = gr.Interface( | |
| fn=speech_to_text, # Function to be executed | |
| inputs=gr.Audio(type="filepath"), # Correct type for file upload | |
| outputs=gr.Textbox(), # Display transcription in a text box | |
| title="Speech-to-Text Analyzer for Lecture Notes", | |
| description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech." | |
| ) | |
| # Launch the interface | |
| iface.launch() | |
| ure Notes", | |
| description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech." | |
| ) | |
| # Launch the interface | |
| iface.launch() | |