Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
|
|
| 12 |
# 加载 Visual Question Answering 模型 microsoft/git-base-vqav2
|
| 13 |
vqa_pipeline = pipeline("text2text-generation", model="microsoft/git-base-vqav2")
|
| 14 |
|
| 15 |
-
#
|
| 16 |
text_to_speech_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
| 17 |
|
| 18 |
def main():
|
|
@@ -23,7 +23,10 @@ def main():
|
|
| 23 |
|
| 24 |
if st.button("Get Answer"):
|
| 25 |
answer = vqa_pipeline(question, image_path)[0]['generated_text']
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
st.write("Answer:", answer)
|
| 29 |
st.audio(audio_data[0]["audio"], format='audio/wav')
|
|
|
|
| 12 |
# 加载 Visual Question Answering 模型 microsoft/git-base-vqav2
|
| 13 |
vqa_pipeline = pipeline("text2text-generation", model="microsoft/git-base-vqav2")
|
| 14 |
|
| 15 |
+
# 加载文本到语音模型
|
| 16 |
text_to_speech_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
| 17 |
|
| 18 |
def main():
|
|
|
|
| 23 |
|
| 24 |
if st.button("Get Answer"):
|
| 25 |
answer = vqa_pipeline(question, image_path)[0]['generated_text']
|
| 26 |
+
|
| 27 |
+
# 将说话者的嵌入向量作为文本的一部分传递给文本到语音模型
|
| 28 |
+
text_with_speaker = f"{answer} Speaker Embeddings: {speaker_embeddings}"
|
| 29 |
+
audio_data = text_to_speech_pipeline(text_with_speaker)
|
| 30 |
|
| 31 |
st.write("Answer:", answer)
|
| 32 |
st.audio(audio_data[0]["audio"], format='audio/wav')
|