Spaces:
Sleeping
Sleeping
| from transformers import pipeline, VitsModel, AutoTokenizer | |
| import torch | |
| import os | |
| from groq import Groq | |
| # Transcriber model | |
| transcriber = pipeline("automatic-speech-recognition", model="SamuelM0422/whisper-small-pt") | |
| # Synthesise model | |
| model = VitsModel.from_pretrained("facebook/mms-tts-por") | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-por") | |
| # LLM query function | |
| def query(text, groq_api_key): | |
| client = Groq( | |
| api_key=groq_api_key, | |
| ) | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| 'role': 'system', | |
| 'content': 'Answer the following question concisely and objectively. If there are numbers in the response, WRITE THEM IN WORDS.', | |
| }, | |
| { | |
| "role": "user", | |
| "content": text, | |
| } | |
| ], | |
| model="llama-3.1-8b-instant", | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # Synthesise function | |
| def synthesise(text): | |
| inputs = tokenizer(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model(**inputs).waveform | |
| return output.cpu() | |
| # Piecing all them together | |
| def ai_assistant(filepath, groq_key): | |
| transcription = transcriber(filepath) | |
| response = query(transcription['text'], groq_key) | |
| audio_response = synthesise(response) | |
| return (16000, audio_response.squeeze().cpu().numpy()), response | |