karthikmn commited on
Commit
1a76809
·
verified ·
1 Parent(s): 09723b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py CHANGED
@@ -7,6 +7,38 @@ import torchaudio
7
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
8
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Function to convert speech to text
11
  def speech_to_text(audio_file):
12
  # Load the audio file
@@ -36,3 +68,9 @@ iface = gr.Interface(
36
 
37
  # Launch the interface
38
  iface.launch()
 
 
 
 
 
 
 
7
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
8
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
9
 
10
+ # Function to convert speech to text
11
+ def speech_to_text(audio_file):
12
+ # Load the audio file
13
+ audio_input, _ = torchaudio.load(audio_file)
14
+
15
+ # Preprocess the audio input (e.g., resample, normalize, etc.)
16
+ input_values = processor(audio_input, return_tensors="pt").input_values
17
+
18
+ # Perform speech-to-text (CTC Decoding)
19
+ with torch.no_grad():
20
+ logits = model(input_values).logits
21
+ predicted_ids = torch.argmax(logits, dim=-1)
22
+
23
+ # Decode the predicted ids to text
24
+ transcription = processor.decode(predicted_ids[0])
25
+
26
+ return transcription
27
+
28
+ # Set up the Gradio interface
29
+ iface = gr.Interface(
30
+ fn=speech_to_text, # Function to be executed
31
+ inputs=gr.Audio(type="filepath"), # Correct type for file upload
32
+ outputs=gr.Textbox(), # Display transcription in a text box
33
+ title="Speech-to-Text Analyzer for Lectimport gradio as gr
34
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
35
+ import torch
36
+ import torchaudio
37
+
38
+ # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face
39
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
40
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
41
+
42
  # Function to convert speech to text
43
  def speech_to_text(audio_file):
44
  # Load the audio file
 
68
 
69
  # Launch the interface
70
  iface.launch()
71
+ ure Notes",
72
+ description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech."
73
+ )
74
+
75
+ # Launch the interface
76
+ iface.launch()