ForeignWhispersNYU / speech2text.py
hammamiomar
add all
da9fc8c
raw
history blame contribute delete
736 Bytes
import os, re
import whisper
from whisper.utils import get_writer
model=whisper.load_model("base").to('cpu')
def get_file_stem(file_name):
# Use a regular expression to match and extract the file stem
match = re.match(r'^(.*/)?([^.]+)\.\w+$', file_name)
if match:
return match.group(2)
else:
return None
def transcribeAndSave(file,outputPath):
result = model.transcribe(file)
writer = get_writer('tsv',outputPath)
writer(result,get_file_stem(file),None)
def transcribeAndSaveFolder(fileFolder,outputpath):
os.makedirs(outputpath, exist_ok=True)
for file in os.listdir(fileFolder):
filepath=os.path.join(fileFolder,file)
transcribeAndSave(filepath,outputpath)