adityaardak commited on
Commit
1d2a7d8
·
verified ·
1 Parent(s): 29fa364

Update scripts/transcribe.py

Browse files
Files changed (1) hide show
  1. scripts/transcribe.py +31 -0
scripts/transcribe.py CHANGED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
3
+ from pydub import AudioSegment
4
+ import numpy as np
5
+
6
+ class SpeechToText:
7
+ def __init__(self):
8
+ print("Loading model...")
9
+ self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
10
+ self.model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
11
+ print("Model loaded successfully.")
12
+
13
+ def convert_audio(self, audio_path):
14
+ print("Converting audio...")
15
+ audio = AudioSegment.from_file(audio_path)
16
+ audio = audio.set_channels(1).set_frame_rate(16000)
17
+ samples = np.array(audio.get_array_of_samples())
18
+ print("Audio conversion complete.")
19
+ return samples
20
+
21
+ def transcribe(self, audio_samples):
22
+ print("Starting transcription...")
23
+ inputs = self.processor(audio_samples, sampling_rate=16000, return_tensors="pt", padding=True)
24
+
25
+ with torch.no_grad():
26
+ logits = self.model(inputs.input_values).logits
27
+
28
+ predicted_ids = torch.argmax(logits, dim=-1)
29
+ transcription = self.processor.decode(predicted_ids[0])
30
+ print("Transcription completed.")
31
+ return transcription