Spaces:

MahmoudElsamadony
/

VoidFilter

Sleeping

App Files Files Community

VoidFilter / utils.py

MahmoudElsamadony

Update utils.py

23feff5 over 2 years ago

raw

history blame contribute delete

2.77 kB

	import numpy as np



	def cut_audio(audio, timestamps):
	# Convert audio to mono if it's stereo
	if audio.ndim > 1:
	audio = np.mean(audio, axis=1)

	# Calculate sample rate and length of each sample
	sample_rate = 16000
	sample_length = 1 / sample_rate

	# Initialize a list to store the cut audio segments
	cut_segments = []

	# Iterate over the timestamps and cut the audio accordingly
	for index, timestamp in enumerate(timestamps):
	start_time = timestamp["start"]
	end_time = timestamp["end"]
	start_sample = int(start_time / sample_length)

	if index == len(timestamps) - 1:
	end_sample = int(end_time / sample_length) + int(1 / sample_length)
	else:
	end_sample = int(end_time / sample_length)

	cut_segment = audio[start_sample:end_sample]
	cut_segments.append(cut_segment)
	# Concatenate the cut audio segments
	final_audio = np.concatenate(cut_segments)

	return final_audio, sample_rate


	def get_word_timestamps(segments):
	word_timestamps = []
	for segment in segments:

	for word in segment.words:
	word_info = {
	'text':word.word,
	'start': word.start,
	'end': word.end
	}
	word_timestamps.append(word_info)
	return word_timestamps


	def get_transcription(word_timestamps):
	transcription=''
	for i in word_timestamps:
	transcription+=i['text']
	return transcription



	def get_modified_timestamps(word_timestamps,filtered_text):
	mod_timestemp=[]
	for h in filtered_text[0].split():

	c=0
	for i in word_timestamps:
	c=c+1
	i['text']=i['text'].replace(' ','')
	if h==i['text']:
	mod_timestemp.append(i)
	break
	mod_timestemp.pop()
	return mod_timestemp





	def filterText(text, model,tokenizer):
	device = 'cpu'
	model = model.to(device)

	text_encoding = tokenizer(
	text,
	max_length = 512,
	padding = 'max_length',
	truncation = True,
	return_attention_mask = True,
	add_special_tokens = True,
	return_tensors = 'pt'
	)

	generated_ids = model.generate(
	input_ids=text_encoding['input_ids'].to(device),
	attention_mask=text_encoding['attention_mask'].to(device),
	max_new_tokens=150,
	no_repeat_ngram_size=2,
	min_new_tokens= 1 ,
	repetition_penalty=2.0,
	length_penalty=0.5,
	num_beams = 10,
	num_return_sequences=1,
	)
	preds = [
	tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)
	for gen_id in generated_ids
	]

	return preds