voice_chatbot / app.py
YAMITEK's picture
Create app.py
be06d90 verified
import streamlit as st
import os
from audio_recorder_streamlit import audio_recorder
from streamlit_float import *
import base64
from openai import OpenAI
api_key = os.getenv("openapikey")
client = OpenAI(api_key=api_key)
def get_answer(messages):
system_message = [{"role": "system", "content": "You are an helpful AI chatbot, that answers questions asked by User."}]
messages = system_message + messages
response = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
messages=messages
)
return response.choices[0].message.content
def speech_to_text(audio_data):
with open(audio_data, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
response_format="text",
file=audio_file
)
return transcript
def text_to_speech(input_text):
response = client.audio.speech.create(
model="tts-1",
voice="nova",
input=input_text
)
webm_file_path = "temp_audio_play.mp3"
with open(webm_file_path, "wb") as f:
response.stream_to_file(webm_file_path)
return webm_file_path
def autoplay_audio(file_path: str):
with open(file_path, "rb") as f:
data = f.read()
b64 = base64.b64encode(data).decode("utf-8")
md = f"""
<audio autoplay>
<source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
</audio>
"""
st.markdown(md, unsafe_allow_html=True)
# Initialize floating features for the interface
float_init()
# Initialize session state for managing chat messages
def initialize_session_state():
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "Hi! How may I assist you today?"}]
initialize_session_state()
st.title("OpenAI Conversational Chatbot 🤖")
# Create a container for the microphone and audio recording
footer_container = st.container()
with footer_container:
audio_bytes = audio_recorder()
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
if audio_bytes:
# Write the audio bytes to a file
with st.spinner("Transcribing..."):
webm_file_path = "temp_audio.mp3"
with open(webm_file_path, "wb") as f:
f.write(audio_bytes)
transcript = speech_to_text(webm_file_path)
if transcript:
st.session_state.messages.append({"role": "user", "content": transcript})
with st.chat_message("user"):
st.write(transcript)
os.remove(webm_file_path)
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
with st.spinner("Thinking🤔..."):
final_response = get_answer(st.session_state.messages)
with st.spinner("Generating audio response..."):
audio_file = text_to_speech(final_response)
autoplay_audio(audio_file)
st.write(final_response)
st.session_state.messages.append({"role": "assistant", "content": final_response})
os.remove(audio_file)
# Float the footer container and provide CSS to target it with
footer_container.float("bottom: 0rem;")