Commit
·
8ef0678
1
Parent(s):
51c83e1
add yi back
Browse files
app.py
CHANGED
|
@@ -95,8 +95,7 @@ repo_id = "coqui/voice-chat-with-zephyr"
|
|
| 95 |
|
| 96 |
default_system_message = f"""
|
| 97 |
You are ##LLM_MODEL###, a large language model trained ##LLM_MODEL_PROVIDER###, architecture of you is decoder-based LM. Your voice backend or text to speech TTS backend is provided via Coqui technology. You are right now served on Huggingface spaces.
|
| 98 |
-
|
| 99 |
-
You cannot access the internet, but you have vast knowledge.
|
| 100 |
Current date: CURRENT_DATE .
|
| 101 |
"""
|
| 102 |
|
|
@@ -130,8 +129,6 @@ pirate_system_message = f"You as {character_name}. {character_scenario} Print ou
|
|
| 130 |
ROLE_PROMPTS["AI Beard The Pirate"]= pirate_system_message
|
| 131 |
##"You are an AI assistant with Zephyr model by Mistral and Hugging Face and speech from Coqui XTTS . User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps, your answers should be clear and short sentences"
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
### WILL USE LOCAL MISTRAL OR ZEPHYR OR YI
|
| 136 |
### While zephyr and yi will use half GPU to fit all into 16GB, XTTS will use at most 5GB VRAM
|
| 137 |
|
|
@@ -149,8 +146,8 @@ zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
|
| 149 |
|
| 150 |
#print("Downloading Yi-6B")
|
| 151 |
#Yi-6B
|
| 152 |
-
|
| 153 |
-
|
| 154 |
|
| 155 |
|
| 156 |
from llama_cpp import Llama
|
|
@@ -170,8 +167,8 @@ llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
|
| 170 |
print("Running LLM Zephyr")
|
| 171 |
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
|
| 176 |
|
| 177 |
# Mistral formatter
|
|
@@ -257,12 +254,15 @@ def generate_local(
|
|
| 257 |
llm_model = "Yi"
|
| 258 |
llm = llm_yi
|
| 259 |
max_tokens= round(max_tokens/2)
|
|
|
|
|
|
|
|
|
|
| 260 |
else:
|
| 261 |
llm_provider= "Mistral"
|
| 262 |
llm_model = "Mistral"
|
| 263 |
llm = llm_mistral
|
| 264 |
-
|
| 265 |
-
|
| 266 |
|
| 267 |
if "yi" in llm_model.lower():
|
| 268 |
formatted_prompt = format_prompt_yi(prompt, history,system_message=sys_message,system_understand_message=sys_system_understand_message)
|
|
@@ -736,7 +736,7 @@ EXAMPLES = [
|
|
| 736 |
[[],"AI Assistant","What is 42?"],
|
| 737 |
[[],"AI Assistant","Speak in French, tell me how are you doing?"],
|
| 738 |
[[],"AI Assistant","Antworten Sie mir von nun an auf Deutsch"],
|
| 739 |
-
[[],"AI Assistant","
|
| 740 |
[[],"AI Beard The Pirate","Who are you?"],
|
| 741 |
[[],"AI Beard The Pirate","Speak in Chinese, 你认识一个叫路飞的海贼吗"],
|
| 742 |
[[],"AI Beard The Pirate","Speak in Japanese, ルフィという海賊を知っていますか?"],
|
|
@@ -744,7 +744,7 @@ EXAMPLES = [
|
|
| 744 |
|
| 745 |
]
|
| 746 |
|
| 747 |
-
MODELS = ["Mistral 7B Instruct","Zephyr 7B Beta"]
|
| 748 |
|
| 749 |
OTHER_HTML=f"""<div>
|
| 750 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
@@ -851,6 +851,7 @@ It relies on following models :
|
|
| 851 |
Speech to Text : [Whisper-large-v2](https://sanchit-gandhi-whisper-large-v2.hf.space/) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
|
| 852 |
LLM Mistral : [Mistral-7b-instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) as the chat model.
|
| 853 |
LLM Zephyr : [Zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF).
|
|
|
|
| 854 |
Text to Speech : [Coqui's XTTS V2](https://huggingface.co/spaces/coqui/xtts) as a Multilingual TTS model, to generate the chatbot answers. This time, the model is hosted locally.
|
| 855 |
|
| 856 |
Note:
|
|
|
|
| 95 |
|
| 96 |
default_system_message = f"""
|
| 97 |
You are ##LLM_MODEL###, a large language model trained ##LLM_MODEL_PROVIDER###, architecture of you is decoder-based LM. Your voice backend or text to speech TTS backend is provided via Coqui technology. You are right now served on Huggingface spaces.
|
| 98 |
+
Don't repeat. Answer short, only few words, as if in a talk. You cannot access the internet, but you have vast knowledge.
|
|
|
|
| 99 |
Current date: CURRENT_DATE .
|
| 100 |
"""
|
| 101 |
|
|
|
|
| 129 |
ROLE_PROMPTS["AI Beard The Pirate"]= pirate_system_message
|
| 130 |
##"You are an AI assistant with Zephyr model by Mistral and Hugging Face and speech from Coqui XTTS . User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps, your answers should be clear and short sentences"
|
| 131 |
|
|
|
|
|
|
|
| 132 |
### WILL USE LOCAL MISTRAL OR ZEPHYR OR YI
|
| 133 |
### While zephyr and yi will use half GPU to fit all into 16GB, XTTS will use at most 5GB VRAM
|
| 134 |
|
|
|
|
| 146 |
|
| 147 |
#print("Downloading Yi-6B")
|
| 148 |
#Yi-6B
|
| 149 |
+
hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
|
| 150 |
+
yi_model_path="./yi-6b.Q5_K_M.gguf"
|
| 151 |
|
| 152 |
|
| 153 |
from llama_cpp import Llama
|
|
|
|
| 167 |
print("Running LLM Zephyr")
|
| 168 |
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 169 |
|
| 170 |
+
print("Running Yi LLM")
|
| 171 |
+
llm_yi = Llama(model_path=yi_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE,model_type="mistral")
|
| 172 |
|
| 173 |
|
| 174 |
# Mistral formatter
|
|
|
|
| 254 |
llm_model = "Yi"
|
| 255 |
llm = llm_yi
|
| 256 |
max_tokens= round(max_tokens/2)
|
| 257 |
+
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 258 |
+
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 259 |
+
|
| 260 |
else:
|
| 261 |
llm_provider= "Mistral"
|
| 262 |
llm_model = "Mistral"
|
| 263 |
llm = llm_mistral
|
| 264 |
+
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 265 |
+
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 266 |
|
| 267 |
if "yi" in llm_model.lower():
|
| 268 |
formatted_prompt = format_prompt_yi(prompt, history,system_message=sys_message,system_understand_message=sys_system_understand_message)
|
|
|
|
| 736 |
[[],"AI Assistant","What is 42?"],
|
| 737 |
[[],"AI Assistant","Speak in French, tell me how are you doing?"],
|
| 738 |
[[],"AI Assistant","Antworten Sie mir von nun an auf Deutsch"],
|
| 739 |
+
[[],"AI Assistant","给我讲个故事 的英文],
|
| 740 |
[[],"AI Beard The Pirate","Who are you?"],
|
| 741 |
[[],"AI Beard The Pirate","Speak in Chinese, 你认识一个叫路飞的海贼吗"],
|
| 742 |
[[],"AI Beard The Pirate","Speak in Japanese, ルフィという海賊を知っていますか?"],
|
|
|
|
| 744 |
|
| 745 |
]
|
| 746 |
|
| 747 |
+
MODELS = ["Mistral 7B Instruct","Zephyr 7B Beta", "Yi 6B"]
|
| 748 |
|
| 749 |
OTHER_HTML=f"""<div>
|
| 750 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
|
|
| 851 |
Speech to Text : [Whisper-large-v2](https://sanchit-gandhi-whisper-large-v2.hf.space/) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
|
| 852 |
LLM Mistral : [Mistral-7b-instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) as the chat model.
|
| 853 |
LLM Zephyr : [Zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF).
|
| 854 |
+
LLM Yi : [Yi-6B](https://huggingface.co/01-ai/Yi-6B) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/Yi-6B-GGUF).
|
| 855 |
Text to Speech : [Coqui's XTTS V2](https://huggingface.co/spaces/coqui/xtts) as a Multilingual TTS model, to generate the chatbot answers. This time, the model is hosted locally.
|
| 856 |
|
| 857 |
Note:
|