Stevross's picture
Update app.py
75ecf3a
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
import openai
import pyttsx3
import subprocess
import os
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large",padding_side = 'left')
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
# Load the GPT-NeoX tokenizer and model
neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
neox_model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
openai.api_key = os.getenv("OPENAI_API_KEY")
messages = [
{"role": "system", "content": "You are a teacher"}
]
def predict(input, history=[]):
new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors='pt')
bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
history = model.generate(bot_input_ids, max_length=500, pad_token_id=tokenizer.eos_token_id).tolist()
response = tokenizer.decode(history[0][len(new_user_input_ids[0]):])
response = response.replace(tokenizer.eos_token, "").strip()
return response, history
def transcribe(audio):
print("Starting transcription process...")
global messages
input_audio = audio
output_audio = "temp_audio.wav"
# Delete the previous output file, if it exists
if os.path.exists(output_audio):
os.remove(output_audio)
command = f"ffmpeg -i {input_audio} {output_audio}"
subprocess.run(command, shell=True, stderr=subprocess.DEVNULL)
with open(output_audio, "rb") as file:
transcription = openai.Audio.transcribe("whisper-1", file)
print("Transcription completed:", transcription)
messages.append({"role": "user", "content": transcription["text"]})
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
AImessage = response["choices"][0]["message"]["content"]
engine = pyttsx3.init()
print("Speaking response...")
engine.say(AImessage)
engine.runAndWait()
print("Response spoken.")
messages.append({"role": "assistant", "content": AImessage})
return transcription["text"]
def combined_fn(input_text, audio, mode, history_state=""):
dialogpt_response = ""
gpt3_response = ""
neox_response = ""
if history_state:
history_list = list(map(int, history_state.strip().split(',')))
else:
history_list = []
if mode == "text":
dialogpt_response, new_history_state = predict(dialogpt_model, dialogpt_tokenizer, input_text, history_list)
# GPT-3.5-turbo response for text mode
messages.append({"role": "user", "content": input_text})
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
gpt3_response = response["choices"][0]["message"]["content"]
messages.append({"role": "assistant", "content": gpt3_response})
# GPT-NeoX response for text mode
neox_response, _ = predict(neox_model, neox_tokenizer, input_text, history_list)
elif mode == "audio":
transcribed_text = transcribe(audio)
dialogpt_response, new_history_state = predict(dialogpt_model, dialogpt_tokenizer, transcribed_text, history_list)
gpt3_response = transcribed_text
neox_response, _ = predict(neox_model, neox_tokenizer, transcribed_text, history_list)
else:
raise ValueError("Invalid mode selected")
responses = {
"DialoGPT-large Response": dialogpt_response,
"GPT-3.5-turbo Response": gpt3_response,
"GPT-NeoX Response": neox_response,
"history_state": ",".join(map(str, new_history_state)) # Convert list of integers to a string
}
return responses
inputs = [
gr.inputs.Textbox(placeholder="Write a text message as if writing a text message to a human."),
gr.inputs.Audio(source='microphone', type='filepath'),
gr.inputs.Radio(["text", "audio"], label="Mode", default="text"),
gr.inputs.Textbox(label="history_state", default=""),
]
outputs = [
gr.outputs.Textbox(label="DialoGPT-large Response"),
gr.outputs.Textbox(label="GPT-3.5-turbo Response"),
gr.outputs.Textbox(label="GPT-NeoX Response"),
gr.outputs.Textbox(label="history_output"),
]
gr.Interface(fn=combined_fn, title="Chatbot Interface", inputs=inputs, outputs=outputs).launch()