Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
| 1 |
-
import openai, os
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
from langchain import OpenAI
|
| 4 |
from langchain.chains import ConversationChain
|
| 5 |
from langchain.memory import ConversationSummaryBufferMemory
|
| 6 |
from langchain.chat_models import ChatOpenAI
|
| 7 |
-
from paddlespeech.cli.tts.infer import TTSExecutor
|
| 8 |
-
import pygame #pygame - 跨平台,支持更多格式的音频文件,如.wav,.mp3,.ogg等。
|
| 9 |
|
| 10 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 11 |
|
|
@@ -15,33 +14,92 @@ conversation = ConversationChain(
|
|
| 15 |
memory=memory,
|
| 16 |
)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def predict(input, history=[]):
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def transcribe(audio):
|
| 35 |
os.rename(audio, audio + '.wav')
|
| 36 |
audio_file = open(audio + '.wav', "rb")
|
| 37 |
-
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
| 38 |
return transcript['text']
|
| 39 |
-
|
| 40 |
def process_audio(audio, history=[]):
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
with gr.Blocks(css="#chatbot{height:
|
| 45 |
chatbot = gr.Chatbot(elem_id="chatbot")
|
| 46 |
state = gr.State([])
|
| 47 |
|
|
@@ -50,8 +108,11 @@ with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as d
|
|
| 50 |
|
| 51 |
with gr.Row():
|
| 52 |
audio = gr.Audio(source="microphone", type="filepath")
|
| 53 |
-
|
| 54 |
-
txt.submit(predict, [txt, state], [chatbot, state])
|
| 55 |
-
audio.change(process_audio, [audio, state], [chatbot, state])
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai, os, time, requests
|
| 2 |
import gradio as gr
|
| 3 |
+
from gradio import HTML
|
| 4 |
from langchain import OpenAI
|
| 5 |
from langchain.chains import ConversationChain
|
| 6 |
from langchain.memory import ConversationSummaryBufferMemory
|
| 7 |
from langchain.chat_models import ChatOpenAI
|
|
|
|
|
|
|
| 8 |
|
| 9 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 10 |
|
|
|
|
| 14 |
memory=memory,
|
| 15 |
)
|
| 16 |
|
| 17 |
+
avatar_url = "https://cdn.discordapp.com/attachments/1065596492796153856/1095617463112187984/John_Carmack_Potrait_668a7a8d-1bb0-427d-8655-d32517f6583d.png"
|
| 18 |
+
|
| 19 |
+
def generate_talk(input, avatar_url,
|
| 20 |
+
voice_type = "microsoft",
|
| 21 |
+
voice_id = "zh-CN-YunyeNeural",
|
| 22 |
+
api_key = os.environ.get('DID_API_KEY')):
|
| 23 |
+
url = "https://api.d-id.com/talks"
|
| 24 |
+
payload = {
|
| 25 |
+
"script": {
|
| 26 |
+
"type": "text",
|
| 27 |
+
"provider": {
|
| 28 |
+
"type": voice_type,
|
| 29 |
+
"voice_id": voice_id
|
| 30 |
+
},
|
| 31 |
+
"ssml": "false",
|
| 32 |
+
"input": input
|
| 33 |
+
},
|
| 34 |
+
"config": {
|
| 35 |
+
"fluent": "false",
|
| 36 |
+
"pad_audio": "0.0"
|
| 37 |
+
},
|
| 38 |
+
"source_url": avatar_url
|
| 39 |
+
}
|
| 40 |
+
headers = {
|
| 41 |
+
"accept": "application/json",
|
| 42 |
+
"content-type": "application/json",
|
| 43 |
+
"authorization": "Basic " + api_key
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
response = requests.post(url, json=payload, headers=headers)
|
| 47 |
+
return response.json()
|
| 48 |
+
|
| 49 |
+
def get_a_talk(id, api_key = os.environ.get('DID_API_KEY')):
|
| 50 |
+
url = "https://api.d-id.com/talks/" + id
|
| 51 |
+
headers = {
|
| 52 |
+
"accept": "application/json",
|
| 53 |
+
"authorization": "Basic "+api_key
|
| 54 |
+
}
|
| 55 |
+
response = requests.get(url, headers=headers)
|
| 56 |
+
return response.json()
|
| 57 |
+
|
| 58 |
+
#result_url 字段会在服务器端把整个视频生成完成之后才出现,所以我们需要循环等待。
|
| 59 |
+
def get_mp4_video(input, avatar_url=avatar_url):
|
| 60 |
+
response = generate_talk(input=input, avatar_url=avatar_url)
|
| 61 |
+
talk = get_a_talk(response['id'])
|
| 62 |
+
video_url = ""
|
| 63 |
+
index = 0
|
| 64 |
+
while index < 30:
|
| 65 |
+
index += 1
|
| 66 |
+
if 'result_url' in talk:
|
| 67 |
+
video_url = talk['result_url']
|
| 68 |
+
return video_url
|
| 69 |
+
else:
|
| 70 |
+
time.sleep(1)
|
| 71 |
+
talk = get_a_talk(response['id'])
|
| 72 |
+
return video_url
|
| 73 |
|
| 74 |
def predict(input, history=[]):
|
| 75 |
+
if input is not None:
|
| 76 |
+
history.append(input)
|
| 77 |
+
response = conversation.predict(input=input)
|
| 78 |
+
video_url = get_mp4_video(input=response, avatar_url=avatar_url)
|
| 79 |
+
video_html = f"""<video width="320" height="240" controls autoplay><source src="{video_url}" type="video/mp4"></video>"""
|
| 80 |
+
history.append(response)
|
| 81 |
+
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
|
| 82 |
+
return responses, video_html, history
|
| 83 |
+
else:
|
| 84 |
+
video_html = f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">'
|
| 85 |
+
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
|
| 86 |
+
return responses, video_html, history
|
| 87 |
+
|
| 88 |
def transcribe(audio):
|
| 89 |
os.rename(audio, audio + '.wav')
|
| 90 |
audio_file = open(audio + '.wav', "rb")
|
| 91 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file, prompt="这是一段简体中文的问题。")
|
| 92 |
return transcript['text']
|
| 93 |
+
|
| 94 |
def process_audio(audio, history=[]):
|
| 95 |
+
if audio is not None:
|
| 96 |
+
text = transcribe(audio)
|
| 97 |
+
return predict(text, history)
|
| 98 |
+
else:
|
| 99 |
+
text = None
|
| 100 |
+
return predict(text, history)
|
| 101 |
|
| 102 |
+
with gr.Blocks(css="#chatbot{height:500px} .overflow-y-auto{height:500px}") as demo:
|
| 103 |
chatbot = gr.Chatbot(elem_id="chatbot")
|
| 104 |
state = gr.State([])
|
| 105 |
|
|
|
|
| 108 |
|
| 109 |
with gr.Row():
|
| 110 |
audio = gr.Audio(source="microphone", type="filepath")
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
with gr.Row():
|
| 113 |
+
video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
|
| 114 |
+
|
| 115 |
+
txt.submit(predict, [txt, state], [chatbot, video, state])
|
| 116 |
+
audio.change(process_audio, [audio, state], [chatbot, video, state])
|
| 117 |
+
|
| 118 |
+
demo.launch()
|