Spaces:

Cathaltwo
/

speechtotext

Runtime error

App Files Files Community

speechtotext / recognizer.py

Cathaltwo

first commit of files

e29c878 over 2 years ago

raw

history blame contribute delete

3.92 kB

	from transformers import pipeline#,SpeechT5ForTextToSpeech,SpeechT5Processor,SpeechT5HifiGan
	# import torch
	# from datasets import load_dataset
	# import soundfile as sf
	# import matplotlib.pyplot as plt


	class Trans:
	def __init__(self,model_name):
	self.model_name=model_name
	self.state=False



	def initiate(self,**kwargs):
	self.state=True

	def predict(self,input):
	if self.state:
	self.response=self.pipe(input)
	else:
	raise Exception('Model not initiated')


	def output(self):
	pass


	def process(self):
	pass


	class Recognizer(Trans):
	def __init__(self,model_name):
	super().__init__(model_name)




	def initiate(self,**kwargs):
	self.state=True
	self.pipe= pipeline("automatic-speech-recognition",self.model_name,**kwargs)

	# def predict(self,file):
	# if self.state:
	# self.response=self.pipe(file)
	# else:
	# raise Exception('Model not initiated')


	def output(self):
	if self.state:
	try:
	return(self.response['text'])
	except AttributeError:
	print('Error: No file was transcribed')
	else:
	raise Exception('Error: Model not initiated')


	def process(self,input):
	self.initiate()
	self.predict(input=input)
	return self.output()

	# class Speaker(Trans):
	# def __init__(self,model_name):
	# super().__init__(model_name)




	# def initiate(self,**kwargs):
	# self.state=True
	# self.model=SpeechT5ForTextToSpeech.from_pretrained(self.model_name)
	# self.processor = SpeechT5Processor.from_pretrained(self.model_name)
	# self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

	# def predict(self,text):
	# if self.state:
	# embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	# self.speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
	# inputs= self.processor(text=text, return_tensors="pt")
	# self.response= self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder)

	# else:
	# raise Exception('Model not initiated')



	# def save(self,file_name="speech.wav"):
	# if self.state:
	# try:
	# sf.write(file_name, self.response.numpy(), samplerate=16000)
	# except AttributeError:
	# print('Error: No text was converted to audio')
	# else:
	# raise Exception('Error: Model not initiated')


	# def spectogram(self):
	# plt.figure()
	# plt.imshow(self.response.T)
	# plt.show()


	# def process(self,input,file_name="speech.wav"):
	# self.initiate()
	# self.predict(text=input)
	# self.save(file_name)

	class Translator(Trans):
	def __init__(self,model_name):
	super().__init__(model_name)




	def initiate(self,**kwargs):
	self.state=True
	self.pipe= pipeline("translation",self.model_name,**kwargs)



	def output(self):
	if self.state:
	try:
	return(self.response[0]['translation_text'])
	except AttributeError:
	print('Error: No file was transcribed')
	else:
	raise Exception('Error: Model not initiated')


	def process(self,input):
	self.initiate()
	self.predict(input=input)
	return self.output()

	# r=Recognizer('openai/whisper-tiny.en')
	# text_eng=r.process('preamble10.wav')
	# print(text_eng)

	# t=Translator("Helsinki-NLP/opus-mt-en-fr")
	# fre_text=t.process(text_eng)
	# print(fre_text)

	# s=Speaker("microsoft/speecht5_tts")
	# s.process(fre_text)


	# with open("output.wav", "rb") as f:
	# data = f.read()

	# pipe = pipeline("automatic-speech-recognition", "openai/whisper-tiny.en")
	# print(pipe('preamble10.wav'))