from transformers import AutoTokenizer, AutoModelForCausalLM import torch import gradio as gr import openai import pyttsx3 import subprocess import os tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large",padding_side = 'left') model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large") # Load the GPT-NeoX tokenizer and model neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") neox_model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b") openai.api_key = os.getenv("OPENAI_API_KEY") messages = [ {"role": "system", "content": "You are a teacher"} ] def predict(input, history=[]): new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors='pt') bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) history = model.generate(bot_input_ids, max_length=500, pad_token_id=tokenizer.eos_token_id).tolist() response = tokenizer.decode(history[0][len(new_user_input_ids[0]):]) response = response.replace(tokenizer.eos_token, "").strip() return response, history def transcribe(audio): print("Starting transcription process...") global messages input_audio = audio output_audio = "temp_audio.wav" # Delete the previous output file, if it exists if os.path.exists(output_audio): os.remove(output_audio) command = f"ffmpeg -i {input_audio} {output_audio}" subprocess.run(command, shell=True, stderr=subprocess.DEVNULL) with open(output_audio, "rb") as file: transcription = openai.Audio.transcribe("whisper-1", file) print("Transcription completed:", transcription) messages.append({"role": "user", "content": transcription["text"]}) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages ) AImessage = response["choices"][0]["message"]["content"] engine = pyttsx3.init() print("Speaking response...") engine.say(AImessage) engine.runAndWait() print("Response spoken.") messages.append({"role": "assistant", "content": AImessage}) return transcription["text"] def combined_fn(input_text, audio, mode, history_state=""): dialogpt_response = "" gpt3_response = "" neox_response = "" if history_state: history_list = list(map(int, history_state.strip().split(','))) else: history_list = [] if mode == "text": dialogpt_response, new_history_state = predict(dialogpt_model, dialogpt_tokenizer, input_text, history_list) # GPT-3.5-turbo response for text mode messages.append({"role": "user", "content": input_text}) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages ) gpt3_response = response["choices"][0]["message"]["content"] messages.append({"role": "assistant", "content": gpt3_response}) # GPT-NeoX response for text mode neox_response, _ = predict(neox_model, neox_tokenizer, input_text, history_list) elif mode == "audio": transcribed_text = transcribe(audio) dialogpt_response, new_history_state = predict(dialogpt_model, dialogpt_tokenizer, transcribed_text, history_list) gpt3_response = transcribed_text neox_response, _ = predict(neox_model, neox_tokenizer, transcribed_text, history_list) else: raise ValueError("Invalid mode selected") responses = { "DialoGPT-large Response": dialogpt_response, "GPT-3.5-turbo Response": gpt3_response, "GPT-NeoX Response": neox_response, "history_state": ",".join(map(str, new_history_state)) # Convert list of integers to a string } return responses inputs = [ gr.inputs.Textbox(placeholder="Write a text message as if writing a text message to a human."), gr.inputs.Audio(source='microphone', type='filepath'), gr.inputs.Radio(["text", "audio"], label="Mode", default="text"), gr.inputs.Textbox(label="history_state", default=""), ] outputs = [ gr.outputs.Textbox(label="DialoGPT-large Response"), gr.outputs.Textbox(label="GPT-3.5-turbo Response"), gr.outputs.Textbox(label="GPT-NeoX Response"), gr.outputs.Textbox(label="history_output"), ] gr.Interface(fn=combined_fn, title="Chatbot Interface", inputs=inputs, outputs=outputs).launch()