adityaardak commited on
Commit
6a165f5
·
verified ·
1 Parent(s): 8854453

Update scripts/transcribe.py

Browse files
Files changed (1) hide show
  1. scripts/transcribe.py +11 -26
scripts/transcribe.py CHANGED
@@ -1,31 +1,16 @@
 
1
  import torch
2
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
3
- from pydub import AudioSegment
4
- import numpy as np
5
 
6
  class SpeechToText:
7
- def __init__(self):
8
- print("Loading model...")
9
- self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
10
- self.model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
11
- print("Model loaded successfully.")
12
 
13
- def convert_audio(self, audio_path):
14
- print("Converting audio...")
15
- audio = AudioSegment.from_file(audio_path)
16
- audio = audio.set_channels(1).set_frame_rate(16000)
17
- samples = np.array(audio.get_array_of_samples()).astype(np.float32) # <-- fixed here
18
- print("Audio conversion complete.")
19
- return samples
20
-
21
- def transcribe(self, audio_samples):
22
- print("Starting transcription...")
23
- inputs = self.processor(audio_samples, sampling_rate=16000, return_tensors="pt", padding=True)
24
-
25
- with torch.no_grad():
26
- logits = self.model(inputs.input_values).logits
27
-
28
- predicted_ids = torch.argmax(logits, dim=-1)
29
- transcription = self.processor.decode(predicted_ids[0])
30
  print("Transcription completed.")
31
- return transcription
 
1
+ import whisper
2
  import torch
 
 
 
3
 
4
  class SpeechToText:
5
+ def __init__(self, model_size="base"):
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
+ print(f"Loading Whisper '{model_size}' model on {device}...")
8
+ self.model = whisper.load_model(model_size, device=device)
9
+ print("Whisper model loaded successfully.")
10
 
11
+ def transcribe(self, audio_path):
12
+ print("Starting transcription with Whisper...")
13
+ result = self.model.transcribe(audio_path)
14
+ transcript = result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  print("Transcription completed.")
16
+ return transcript