aka7774's picture
Upload 3 files
8d16fde verified
raw
history blame contribute delete
985 Bytes
import torch
import whisper
model = None
model_size = None
initial_prompt = None
def load_model(_model_size):
global model_size, model
if _model_size and model_size != _model_size:
model_size = _model_size
if torch.cuda.is_available():
model = whisper.load_model(model_size, device="cuda")
else:
model = whisper.load_model(model_size, device="cpu")
def set_prompt(prompt):
global initial_prompt
initial_prompt = prompt
def speech_to_text(audio_file, _model_size = None):
global model_size, model, initial_prompt
load_model(_model_size)
res = model.transcribe(
audio_file,
initial_prompt=initial_prompt,
language='ja',
beam_size=5,
without_timestamps=False,
)
text_with_timestamps = ''
for segment in res['segments']:
text_with_timestamps += f"{segment['start']:.2f}\t{segment['end']:.2f}\t{segment['text']}\n"
return res['text'], text_with_timestamps