Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, WhisperProcessor, WhisperForConditionalGeneration | |
| from gtts import gTTS | |
| import os | |
| class InteractiveChat: | |
| def __init__(self): | |
| self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-large") | |
| self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large") | |
| self.zephyr_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") | |
| self.zephyr_model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto") | |
| def generate_response(self, input_data): | |
| input_features = self.whisper_processor(input_data) | |
| predicted_ids = self.whisper_model.generate(input_features) | |
| transcription = self.whisper_processor.batch_decode(predicted_ids) | |
| response = self.get_zephyr_response(transcription) | |
| self.speak(response) | |
| return response | |
| def get_zephyr_response(self, transcription): | |
| zephyr_pipeline = pipeline("text-generation") | |
| response = zephyr_pipeline(transcription)[0]["generated_text"] | |
| return response | |
| def speak(self, text): | |
| tts = gTTS(text=text, lang='en') | |
| tts.save("output.mp3") | |
| os.system("mpg321 output.mp3") | |
| # Create an instance of the InteractiveChat class | |
| chat = InteractiveChat() | |
| # Define a function that wraps the generate_response method | |
| def generate_response_fn(input_data): | |
| return chat.generate_response(input_data) | |
| # Use the function in gr.Interface | |
| interface = gr.Interface( | |
| gr.Audio(type="filepath"), # Accept audio files | |
| gr.Textbox(), | |
| generate_response_fn # Pass the function here | |
| ) | |
| interface.launch() | |