|
|
import base64 |
|
|
import requests |
|
|
import gradio as gr |
|
|
from openai import OpenAI |
|
|
import os |
|
|
OpenAI1 = os.getenv("OpenAI") |
|
|
|
|
|
client = OpenAI(api_key=OpenAI1) |
|
|
import base64 |
|
|
import requests |
|
|
import gradio as gr |
|
|
from openai import OpenAI |
|
|
|
|
|
|
|
|
def process_voice_input(input_audio): |
|
|
|
|
|
if input_audio is None or not os.path.exists(input_audio): |
|
|
return "No audio file received or the file path is invalid." |
|
|
|
|
|
|
|
|
with open(input_audio, "rb") as audio_file: |
|
|
wav_data = audio_file.read() |
|
|
|
|
|
encoded_string = base64.b64encode(wav_data).decode('utf-8') |
|
|
|
|
|
|
|
|
completion = client.chat.completions.create( |
|
|
model="gpt-4o-audio-preview", |
|
|
modalities=["text", "audio"], |
|
|
audio={"voice": "alloy", "format": "wav"}, |
|
|
messages=[ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{ |
|
|
"type": "text", |
|
|
"text": "What is in this recording?" |
|
|
}, |
|
|
{ |
|
|
"type": "input_audio", |
|
|
"input_audio": { |
|
|
"data": encoded_string, |
|
|
"format": "wav" |
|
|
} |
|
|
} |
|
|
] |
|
|
} |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data) |
|
|
output_audio_path = "response.wav" |
|
|
with open(output_audio_path, "wb") as f: |
|
|
f.write(wav_bytes) |
|
|
|
|
|
|
|
|
return output_audio_path |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=process_voice_input, |
|
|
inputs=gr.Audio(type="filepath"), |
|
|
outputs=gr.Audio(type="filepath"), |
|
|
title="Voice Chat with GPT-4", |
|
|
description="Record a voice message and get a voice response." |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |
|
|
|