glt3953 commited on
Commit
cfd3099
·
1 Parent(s): b3263ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -26
app.py CHANGED
@@ -1,11 +1,10 @@
1
- import openai, os
2
  import gradio as gr
 
3
  from langchain import OpenAI
4
  from langchain.chains import ConversationChain
5
  from langchain.memory import ConversationSummaryBufferMemory
6
  from langchain.chat_models import ChatOpenAI
7
- from paddlespeech.cli.tts.infer import TTSExecutor
8
- import pygame #pygame - 跨平台,支持更多格式的音频文件,如.wav,.mp3,.ogg等。
9
 
10
  openai.api_key = os.environ["OPENAI_API_KEY"]
11
 
@@ -15,33 +14,92 @@ conversation = ConversationChain(
15
  memory=memory,
16
  )
17
 
18
- tts_executor = TTSExecutor()
19
- pygame.mixer.init()
20
- def play_voice(text):
21
- output_file = "./data/text.wav"
22
- tts_executor(text=text, output=output_file)
23
- pygame.mixer.music.load(output_file)
24
- pygame.mixer.music.play()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def predict(input, history=[]):
27
- history.append(input)
28
- response = conversation.predict(input=input)
29
- history.append(response)
30
- play_voice(response)
31
- responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
32
- return responses, history
33
-
 
 
 
 
 
 
34
  def transcribe(audio):
35
  os.rename(audio, audio + '.wav')
36
  audio_file = open(audio + '.wav', "rb")
37
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
38
  return transcript['text']
39
-
40
  def process_audio(audio, history=[]):
41
- text = transcribe(audio)
42
- return predict(text, history)
 
 
 
 
43
 
44
- with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as demo:
45
  chatbot = gr.Chatbot(elem_id="chatbot")
46
  state = gr.State([])
47
 
@@ -50,8 +108,11 @@ with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as d
50
 
51
  with gr.Row():
52
  audio = gr.Audio(source="microphone", type="filepath")
53
-
54
- txt.submit(predict, [txt, state], [chatbot, state])
55
- audio.change(process_audio, [audio, state], [chatbot, state])
56
 
57
- demo.launch()
 
 
 
 
 
 
 
1
+ import openai, os, time, requests
2
  import gradio as gr
3
+ from gradio import HTML
4
  from langchain import OpenAI
5
  from langchain.chains import ConversationChain
6
  from langchain.memory import ConversationSummaryBufferMemory
7
  from langchain.chat_models import ChatOpenAI
 
 
8
 
9
  openai.api_key = os.environ["OPENAI_API_KEY"]
10
 
 
14
  memory=memory,
15
  )
16
 
17
+ avatar_url = "https://cdn.discordapp.com/attachments/1065596492796153856/1095617463112187984/John_Carmack_Potrait_668a7a8d-1bb0-427d-8655-d32517f6583d.png"
18
+
19
+ def generate_talk(input, avatar_url,
20
+ voice_type = "microsoft",
21
+ voice_id = "zh-CN-YunyeNeural",
22
+ api_key = os.environ.get('DID_API_KEY')):
23
+ url = "https://api.d-id.com/talks"
24
+ payload = {
25
+ "script": {
26
+ "type": "text",
27
+ "provider": {
28
+ "type": voice_type,
29
+ "voice_id": voice_id
30
+ },
31
+ "ssml": "false",
32
+ "input": input
33
+ },
34
+ "config": {
35
+ "fluent": "false",
36
+ "pad_audio": "0.0"
37
+ },
38
+ "source_url": avatar_url
39
+ }
40
+ headers = {
41
+ "accept": "application/json",
42
+ "content-type": "application/json",
43
+ "authorization": "Basic " + api_key
44
+ }
45
+
46
+ response = requests.post(url, json=payload, headers=headers)
47
+ return response.json()
48
+
49
+ def get_a_talk(id, api_key = os.environ.get('DID_API_KEY')):
50
+ url = "https://api.d-id.com/talks/" + id
51
+ headers = {
52
+ "accept": "application/json",
53
+ "authorization": "Basic "+api_key
54
+ }
55
+ response = requests.get(url, headers=headers)
56
+ return response.json()
57
+
58
+ #result_url 字段会在服务器端把整个视频生成完成之后才出现,所以我们需要循环等待。
59
+ def get_mp4_video(input, avatar_url=avatar_url):
60
+ response = generate_talk(input=input, avatar_url=avatar_url)
61
+ talk = get_a_talk(response['id'])
62
+ video_url = ""
63
+ index = 0
64
+ while index < 30:
65
+ index += 1
66
+ if 'result_url' in talk:
67
+ video_url = talk['result_url']
68
+ return video_url
69
+ else:
70
+ time.sleep(1)
71
+ talk = get_a_talk(response['id'])
72
+ return video_url
73
 
74
  def predict(input, history=[]):
75
+ if input is not None:
76
+ history.append(input)
77
+ response = conversation.predict(input=input)
78
+ video_url = get_mp4_video(input=response, avatar_url=avatar_url)
79
+ video_html = f"""<video width="320" height="240" controls autoplay><source src="{video_url}" type="video/mp4"></video>"""
80
+ history.append(response)
81
+ responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
82
+ return responses, video_html, history
83
+ else:
84
+ video_html = f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">'
85
+ responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
86
+ return responses, video_html, history
87
+
88
  def transcribe(audio):
89
  os.rename(audio, audio + '.wav')
90
  audio_file = open(audio + '.wav', "rb")
91
+ transcript = openai.Audio.transcribe("whisper-1", audio_file, prompt="这是一段简体中文的问题。")
92
  return transcript['text']
93
+
94
  def process_audio(audio, history=[]):
95
+ if audio is not None:
96
+ text = transcribe(audio)
97
+ return predict(text, history)
98
+ else:
99
+ text = None
100
+ return predict(text, history)
101
 
102
+ with gr.Blocks(css="#chatbot{height:500px} .overflow-y-auto{height:500px}") as demo:
103
  chatbot = gr.Chatbot(elem_id="chatbot")
104
  state = gr.State([])
105
 
 
108
 
109
  with gr.Row():
110
  audio = gr.Audio(source="microphone", type="filepath")
 
 
 
111
 
112
+ with gr.Row():
113
+ video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
114
+
115
+ txt.submit(predict, [txt, state], [chatbot, video, state])
116
+ audio.change(process_audio, [audio, state], [chatbot, video, state])
117
+
118
+ demo.launch()