| |
| import os |
| import base64 |
| import gradio as gr |
| from openai import OpenAI |
|
|
| |
| openai_api_key = os.getenv('OPENAI_API_KEY') |
|
|
| if openai_api_key: |
| print(f"OpenAI API Key exists and begins {openai_api_key[:8]}") |
| else: |
| print("OpenAI API Key not set") |
|
|
| MODEL = "gpt-4o-mini" |
| openai = OpenAI() |
|
|
| system_message = "You are a language tutor, and as such provide only with \ |
| helpful tips and accurate translations. You are entertaining and polite. \ |
| If you don't know something, you say so." |
|
|
|
|
| def talker(message): |
| response = openai.audio.speech.create( |
| model="tts-1", |
| voice="nova", |
| input=message |
| ) |
|
|
| |
| audio_base64 = base64.b64encode(response.content).decode() |
| audio_html = f'<audio autoplay controls src="data:audio/mp3;base64,{audio_base64}"></audio>' |
|
|
| return message, audio_html |
|
|
|
|
| |
| def transcribe_audio(audio_file): |
| |
| with open(audio_file, "rb") as audio: |
| translation = openai.audio.translations.create( |
| model="whisper-1", |
| file=audio |
| ) |
|
|
| print(translation.text) |
| return translation.text |
|
|
|
|
| |
| def process_microphone_input(audio, history=[]): |
| if audio is None: |
| raise ValueError("No audio input detected. Please ensure the microphone is functioning correctly.") |
| |
| transcribed_text = transcribe_audio(audio) |
|
|
| |
| response = chat(transcribed_text, history) |
|
|
| return response |
|
|
|
|
| def chat(message, history): |
| messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}] |
| response = openai.chat.completions.create(model=MODEL, messages=messages) |
| reply = response.choices[0].message.content |
|
|
| print(f"History: {history}") |
| print(f"Message: {message}") |
| print(f"Messages: {messages}") |
|
|
| return talker(reply) |
|
|
|
|
| |
| interface = gr.Interface( |
| fn=process_microphone_input, |
| inputs=[gr.Audio(sources="microphone", type="filepath")], |
| outputs=["text", "html"], |
| title="Speech-to-Chatbot-to-Speech Language Tutor", |
| description="Speak into the microphone to chat with GPT-4. Wait a couple of seconds before you submit your message." |
| ) |
|
|
| if __name__ == "__main__": |
| interface.launch() |
|
|