prjt-nvd / app.py
ANASDAVOODTK's picture
Update app.py
b1bef29
# -*- coding: utf-8 -*-
"""AD.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/17QP7dk4lEklxpGBT704p1j_yUo1U9EuJ
"""
import gradio as gr
import openai
from elevenlabslib import *
from pydub import AudioSegment
from pydub.playback import play
import io
import config
import os
import librosa
import IPython.display as ipd
from elevenlabslib import ElevenLabsUser
os.system("sudo apt-get install portaudio19-dev")
os.system("pip install whisper openai elevenlabslib pydub pyaudio gradio config")
openai.api_key = os.environ['openapi']
user = ElevenLabsUser("b73020f58fc303e3c343782b85cf7138")
messages = ["You are an advisor. Please respond to all input in 50 words or less."]
def transcribe(audio , voice_name):
global messages
audio_file = open(audio, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
messages.append(f"\nUser: {transcript['text']}")
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": f"{messages[-1]}"},]
)
system_message = response["choices"][0]["message"]["content"]
print(system_message)
messages.append(f"{system_message}")
voice = user.get_voices_by_name(voice_name)[0]
audio = voice.generate_audio_bytes(system_message)
audio = AudioSegment.from_file(io.BytesIO(audio), format="mp3")
audio.export("output.wav", format="wav")
chat_transcript = "\n".join(messages)
return "output.wav"
iface = gr.Interface(fn = transcribe,
inputs = [gr.Audio(source="microphone", type="filepath"), "text"],
outputs = [gr.Audio()],
)
iface.launch(share=True , debug=True)