Spaces:

NealCaren
/

TranscribeX

Runtime error

App Files Files Community

TranscribeX / app.py

NealCaren

Update app.py

2a9bc23 verified almost 2 years ago

raw

history blame contribute delete

2.41 kB

	import whisperx
	import streamlit as st
	import torch
	import tempfile
	import subprocess


	def transcribe(audio_file):


	if torch.cuda.is_available():
	device = "cuda"
	else:
	device = "cpu"
	batch_size = 16 # reduce if low on GPU mem
	compute_type = "int8" # change to "float16" if high on GPU mem (may reduce accuracy)
	YOUR_HF_TOKEN = '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'

	# load audio file
	audio_bytes = uploaded_file.getvalue()
	with open(temp_file, 'wb') as f:
	f.write(audio_bytes)

	# 1. Transcribe with original whisper (batched)
	model = whisperx.load_model("tiny", device = device, compute_type=compute_type)

	audio = whisperx.load_audio(temp_file)
	result = model.transcribe(audio, batch_size=batch_size)
	st.write("Transcribed! Here's what we have so far:")
	st.write(result["segments"]) # before alignment

	# delete model if low on GPU resources
	# import gc; gc.collect(); torch.cuda.empty_cache(); del model

	# 2. Align whisper output
	model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
	result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
	st.write("Aligned! Here's what we have so far:")
	st.write(result["segments"]) # after alignment

	# delete model if low on GPU resources
	# import gc; gc.collect(); torch.cuda.empty_cache(); del model_a

	# 3. Assign speaker labels
	diarize_model = whisperx.DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device)

	# add min/max number of speakers if known
	diarize_segments = diarize_model(audio_file)
	# diarize_model(audio_file, min_speakers=min_speakers, max_speakers=max_speakers)

	result = whisperx.assign_word_speakers(diarize_segments, result)
	st.write(diarize_segments)
	st.write(result["segments"]) # segments are now assigned speaker IDs


	st.title("Automated Transcription")

	form = st.form(key='my_form')
	uploaded_file = form.file_uploader("Choose a file")

	submit = form.form_submit_button("Transcribe!")


	if submit:
	#temporary file to store audio_file
	tmp_dir = tempfile.TemporaryDirectory()
	temp_file = tmp_dir.name + '/mono.wav'
	cmd = f"ffmpeg -y -i {uploaded_file} -acodec pcm_s16le -ar 16000 -ac 1 {temp_file}"
	subprocess.Popen(cmd, shell=True).wait()

	transcribe(temp_file)