Spaces:

karthikmn
/

smart-notes

Runtime error

App Files Files Community

karthikmn commited on Jun 7, 2025

Commit

1a76809

verified ·

1 Parent(s): 09723b9

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -0

app.py CHANGED Viewed

@@ -7,6 +7,38 @@ import torchaudio
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 # Function to convert speech to text
 def speech_to_text(audio_file):
     # Load the audio file
@@ -36,3 +68,9 @@ iface = gr.Interface(
 # Launch the interface
 iface.launch()

 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
+# Function to convert speech to text
+def speech_to_text(audio_file):
+    # Load the audio file
+    audio_input, _ = torchaudio.load(audio_file)
+    # Preprocess the audio input (e.g., resample, normalize, etc.)
+    input_values = processor(audio_input, return_tensors="pt").input_values
+    # Perform speech-to-text (CTC Decoding)
+    with torch.no_grad():
+        logits = model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    # Decode the predicted ids to text
+    transcription = processor.decode(predicted_ids[0])
+    return transcription
+# Set up the Gradio interface
+iface = gr.Interface(
+    fn=speech_to_text,  # Function to be executed
+    inputs=gr.Audio(type="filepath"),  # Correct type for file upload
+    outputs=gr.Textbox(),  # Display transcription in a text box
+    title="Speech-to-Text Analyzer for Lectimport gradio as gr
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+import torchaudio
+# Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 # Function to convert speech to text
 def speech_to_text(audio_file):
     # Load the audio file
 # Launch the interface
 iface.launch()
+ure Notes",
+    description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech."
+)
+# Launch the interface
+iface.launch()