yasvanthkumar commited on
Commit
06deb2d
·
verified ·
1 Parent(s): c55b141

Create utils/transcription.py

Browse files
Files changed (1) hide show
  1. utils/transcription.py +46 -0
utils/transcription.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import torch
3
+ from pathlib import Path
4
+
5
+ class TranscriptionService:
6
+ def __init__(self, model_size="base"):
7
+ """Initialize Whisper model"""
8
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ print(f"Loading Whisper model on {self.device}...")
10
+ self.model = whisper.load_model(model_size, device=self.device)
11
+
12
+ def transcribe(self, audio_path):
13
+ """Transcribe audio with word-level timestamps"""
14
+ try:
15
+ result = self.model.transcribe(
16
+ audio_path,
17
+ word_timestamps=True,
18
+ language="en"
19
+ )
20
+
21
+ # Format transcript with timestamps
22
+ segments = []
23
+ for segment in result['segments']:
24
+ segments.append({
25
+ 'start': segment['start'],
26
+ 'end': segment['end'],
27
+ 'text': segment['text'].strip(),
28
+ 'words': segment.get('words', [])
29
+ })
30
+
31
+ return {
32
+ 'text': result['text'],
33
+ 'segments': segments,
34
+ 'language': result['language']
35
+ }
36
+
37
+ except Exception as e:
38
+ raise Exception(f"Transcription failed: {str(e)}")
39
+
40
+ def get_text_at_time(self, transcript, start_time, end_time):
41
+ """Get transcript text for a specific time range"""
42
+ text_parts = []
43
+ for segment in transcript['segments']:
44
+ if segment['start'] >= start_time and segment['end'] <= end_time:
45
+ text_parts.append(segment['text'])
46
+ return ' '.join(text_parts)