iarfmoose3's picture
Update app.py
3384e1f verified
import os
os.environ["COQUI_TOS_AGREED"] = "1"
from faster_whisper import WhisperModel
model = WhisperModel("large-v3")
from TTS.api import TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
# from IPython.display import Audio, display
import gradio as gr
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from together import Together
from pydub import AudioSegment
from time import sleep
import torchaudio
memory = ConversationBufferMemory()
api_k = os.getenv("API_KEY")
llm = ChatOpenAI(api_key=api_k)
conversation = ConversationChain(
llm=llm,
memory=memory
)
i=0
client = Together(api_key=api_k)
def generate_response(user_input):
conversation.memory.chat_memory.add_user_message(user_input)
response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
messages=[{"role": "user", "content": "Pretend you're Tommy Vercetti from GTA Vice City. Talk like him—confident, street-smart, with a tough attitude. Respond to everything I say like you're running your criminal empire in Vice City. Only respond no need for actions"},
{"role": "user", "content": user_input}],
)
conversation.memory.chat_memory.add_ai_message(response.choices[0].message.content)
text_response = response.choices[0].message.content
speak = text_response.replace("Tommy Vercetti:", '')
output_path = output_path = "output.wav"
tts.tts_to_file(
text=speak,
file_path=output_path,
speaker_wav=["./ref1.wav"],
language="en",
split_sentences=True
)
output_path = "./output.wav"
return text_response, output_path
def respond(user_input):
text_response, output_path = generate_response(user_input)
return text_response, output_path
gr.Interface(
fn=respond,
inputs="text",
outputs=[
gr.Text(label="Transcription"),
gr.Audio(label="Tommy's Thoughts",autoplay=True),
],
title="Tommy Vercetti",
description="🔊 Engage in a dynamic conversation simulation with the character Tommy Vercetti from the video game Grand Theft Auto: Vice City 🕶️. The interaction provides text-based responses 💬 and corresponding voice outputs 🔉, simulating natural language processing and speech synthesis techniques 🤖. This exercise leverages advanced AI-driven dialogue systems to create a more immersive experience 🎮."
).launch(debug=True)