Spaces:
Runtime error
Runtime error
| from transformers import pipeline#,SpeechT5ForTextToSpeech,SpeechT5Processor,SpeechT5HifiGan | |
| # import torch | |
| # from datasets import load_dataset | |
| # import soundfile as sf | |
| # import matplotlib.pyplot as plt | |
| class Trans: | |
| def __init__(self,model_name): | |
| self.model_name=model_name | |
| self.state=False | |
| def initiate(self,**kwargs): | |
| self.state=True | |
| def predict(self,input): | |
| if self.state: | |
| self.response=self.pipe(input) | |
| else: | |
| raise Exception('Model not initiated') | |
| def output(self): | |
| pass | |
| def process(self): | |
| pass | |
| class Recognizer(Trans): | |
| def __init__(self,model_name): | |
| super().__init__(model_name) | |
| def initiate(self,**kwargs): | |
| self.state=True | |
| self.pipe= pipeline("automatic-speech-recognition",self.model_name,**kwargs) | |
| # def predict(self,file): | |
| # if self.state: | |
| # self.response=self.pipe(file) | |
| # else: | |
| # raise Exception('Model not initiated') | |
| def output(self): | |
| if self.state: | |
| try: | |
| return(self.response['text']) | |
| except AttributeError: | |
| print('Error: No file was transcribed') | |
| else: | |
| raise Exception('Error: Model not initiated') | |
| def process(self,input): | |
| self.initiate() | |
| self.predict(input=input) | |
| return self.output() | |
| # class Speaker(Trans): | |
| # def __init__(self,model_name): | |
| # super().__init__(model_name) | |
| # def initiate(self,**kwargs): | |
| # self.state=True | |
| # self.model=SpeechT5ForTextToSpeech.from_pretrained(self.model_name) | |
| # self.processor = SpeechT5Processor.from_pretrained(self.model_name) | |
| # self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| # def predict(self,text): | |
| # if self.state: | |
| # embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
| # self.speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) | |
| # inputs= self.processor(text=text, return_tensors="pt") | |
| # self.response= self.model.generate_speech(inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder) | |
| # else: | |
| # raise Exception('Model not initiated') | |
| # def save(self,file_name="speech.wav"): | |
| # if self.state: | |
| # try: | |
| # sf.write(file_name, self.response.numpy(), samplerate=16000) | |
| # except AttributeError: | |
| # print('Error: No text was converted to audio') | |
| # else: | |
| # raise Exception('Error: Model not initiated') | |
| # def spectogram(self): | |
| # plt.figure() | |
| # plt.imshow(self.response.T) | |
| # plt.show() | |
| # def process(self,input,file_name="speech.wav"): | |
| # self.initiate() | |
| # self.predict(text=input) | |
| # self.save(file_name) | |
| class Translator(Trans): | |
| def __init__(self,model_name): | |
| super().__init__(model_name) | |
| def initiate(self,**kwargs): | |
| self.state=True | |
| self.pipe= pipeline("translation",self.model_name,**kwargs) | |
| def output(self): | |
| if self.state: | |
| try: | |
| return(self.response[0]['translation_text']) | |
| except AttributeError: | |
| print('Error: No file was transcribed') | |
| else: | |
| raise Exception('Error: Model not initiated') | |
| def process(self,input): | |
| self.initiate() | |
| self.predict(input=input) | |
| return self.output() | |
| # r=Recognizer('openai/whisper-tiny.en') | |
| # text_eng=r.process('preamble10.wav') | |
| # print(text_eng) | |
| # t=Translator("Helsinki-NLP/opus-mt-en-fr") | |
| # fre_text=t.process(text_eng) | |
| # print(fre_text) | |
| # s=Speaker("microsoft/speecht5_tts") | |
| # s.process(fre_text) | |
| # with open("output.wav", "rb") as f: | |
| # data = f.read() | |
| # pipe = pipeline("automatic-speech-recognition", "openai/whisper-tiny.en") | |
| # print(pipe('preamble10.wav')) | |