Commit
·
28f7799
1
Parent(s):
87be2eb
add Yi-6B-200K
Browse files
app.py
CHANGED
|
@@ -106,7 +106,7 @@ system_message = system_message.replace("CURRENT_DATE", str(datetime.date.today(
|
|
| 106 |
|
| 107 |
# MISTRAL ONLY
|
| 108 |
default_system_understand_message = (
|
| 109 |
-
"I understand, I am a
|
| 110 |
)
|
| 111 |
system_understand_message = os.environ.get(
|
| 112 |
"SYSTEM_UNDERSTAND_MESSAGE", default_system_understand_message
|
|
@@ -132,24 +132,26 @@ ROLE_PROMPTS["AI Beard The Pirate"]= pirate_system_message
|
|
| 132 |
|
| 133 |
|
| 134 |
|
| 135 |
-
### WILL USE LOCAL MISTRAL OR ZEPHYR
|
|
|
|
| 136 |
|
| 137 |
from huggingface_hub import hf_hub_download
|
| 138 |
print("Downloading LLM")
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
print("Downloading Zephyr")
|
| 142 |
#Zephyr
|
| 143 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
| 144 |
-
# use new gguf format
|
| 145 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
| 146 |
|
| 147 |
-
print("Downloading Mistral")
|
| 148 |
#Mistral
|
| 149 |
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
| 150 |
-
# use new gguf format
|
| 151 |
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
from llama_cpp import Llama
|
| 155 |
# set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
|
|
@@ -164,7 +166,10 @@ print("Running LLM Mistral")
|
|
| 164 |
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 165 |
|
| 166 |
print("Running LLM Zephyr")
|
| 167 |
-
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
|
| 170 |
# Mistral formatter
|
|
@@ -230,8 +235,15 @@ def generate_local(
|
|
| 230 |
formatted_prompt = format_prompt_zephyr(prompt, history,system_message=sys_message)
|
| 231 |
llm = llm_zephyr
|
| 232 |
else:
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
llm = llm_mistral
|
| 236 |
|
| 237 |
|
|
@@ -680,9 +692,11 @@ EXAMPLES = [
|
|
| 680 |
[[],"AI Assistant","Speak in French, tell me how are you doing?"],
|
| 681 |
[[],"AI Assistant","Antworten Sie mir von nun an auf Deutsch"],
|
| 682 |
[[],"AI Beard The Pirate","Who are you?"],
|
|
|
|
|
|
|
| 683 |
]
|
| 684 |
|
| 685 |
-
MODELS = ["Mistral","Zephyr"]
|
| 686 |
|
| 687 |
OTHER_HTML=f"""<div>
|
| 688 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
@@ -699,7 +713,7 @@ with gr.Blocks(title=title) as demo:
|
|
| 699 |
with gr.Row():
|
| 700 |
model_selected = gr.Dropdown(
|
| 701 |
label="Select Instuct LLM Model to Use",
|
| 702 |
-
info="Zephyr
|
| 703 |
choices=MODELS,
|
| 704 |
max_choices=1,
|
| 705 |
value=MODELS[0],
|
|
@@ -789,7 +803,8 @@ It relies on following models :
|
|
| 789 |
Speech to Text : [Whisper-large-v2](https://sanchit-gandhi-whisper-large-v2.hf.space/) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
|
| 790 |
LLM Mistral : [Mistral-7b-instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) as the chat model, GGUF Q5_K_M quantized version used locally via llama_cpp[huggingface_hub](TheBloke/Mistral-7B-Instruct-v0.1-GGUF).
|
| 791 |
LLM Zephyr : [Zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF).
|
| 792 |
-
|
|
|
|
| 793 |
|
| 794 |
Note:
|
| 795 |
- By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml
|
|
|
|
| 106 |
|
| 107 |
# MISTRAL ONLY
|
| 108 |
default_system_understand_message = (
|
| 109 |
+
"I understand, I am a ##LLM_MODEL### chatbot with speech by Coqui team."
|
| 110 |
)
|
| 111 |
system_understand_message = os.environ.get(
|
| 112 |
"SYSTEM_UNDERSTAND_MESSAGE", default_system_understand_message
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
|
| 135 |
+
### WILL USE LOCAL MISTRAL OR ZEPHYR OR YI
|
| 136 |
+
### While zephyr and yi will use half GPU to fit all into 16GB, XTTS will use at most 5GB VRAM
|
| 137 |
|
| 138 |
from huggingface_hub import hf_hub_download
|
| 139 |
print("Downloading LLM")
|
| 140 |
+
print("Downloading Zephyr 7B beta")
|
|
|
|
|
|
|
| 141 |
#Zephyr
|
| 142 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
|
|
|
| 143 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
| 144 |
|
| 145 |
+
print("Downloading Mistral 7B Instruct")
|
| 146 |
#Mistral
|
| 147 |
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
|
|
|
| 148 |
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
| 149 |
|
| 150 |
+
print("Downloading Yi-6B-200k")
|
| 151 |
+
#Yi-6B-200K
|
| 152 |
+
hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b-200k.Q5_K_M.gguf")
|
| 153 |
+
yi_model_path="./yi-6b-200k.Q5_K_M.gguf"
|
| 154 |
+
|
| 155 |
|
| 156 |
from llama_cpp import Llama
|
| 157 |
# set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
|
|
|
|
| 166 |
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 167 |
|
| 168 |
print("Running LLM Zephyr")
|
| 169 |
+
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-5,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 170 |
+
|
| 171 |
+
print("Running Yi LLM")
|
| 172 |
+
llm_zephyr = Llama(model_path=yi_model_path,n_gpu_layers=GPU_LAYERS-5,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 173 |
|
| 174 |
|
| 175 |
# Mistral formatter
|
|
|
|
| 235 |
formatted_prompt = format_prompt_zephyr(prompt, history,system_message=sys_message)
|
| 236 |
llm = llm_zephyr
|
| 237 |
else:
|
| 238 |
+
if "yi" in llm_model.lower():
|
| 239 |
+
llm_provider= "01.ai"
|
| 240 |
+
llm_model = "Yi"
|
| 241 |
+
else:
|
| 242 |
+
llm_provider= "Mistral"
|
| 243 |
+
llm_model = "Mistral"
|
| 244 |
+
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 245 |
+
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 246 |
+
formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message=sys_system_understand_message)
|
| 247 |
llm = llm_mistral
|
| 248 |
|
| 249 |
|
|
|
|
| 692 |
[[],"AI Assistant","Speak in French, tell me how are you doing?"],
|
| 693 |
[[],"AI Assistant","Antworten Sie mir von nun an auf Deutsch"],
|
| 694 |
[[],"AI Beard The Pirate","Who are you?"],
|
| 695 |
+
[[],"AI Beard The Pirate","告诉我你的冒险经历"],
|
| 696 |
+
|
| 697 |
]
|
| 698 |
|
| 699 |
+
MODELS = ["Mistral 7B Instruct","Zephyr 7B Beta","Yi 6B"]
|
| 700 |
|
| 701 |
OTHER_HTML=f"""<div>
|
| 702 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
|
|
| 713 |
with gr.Row():
|
| 714 |
model_selected = gr.Dropdown(
|
| 715 |
label="Select Instuct LLM Model to Use",
|
| 716 |
+
info="Mistral, Zephyr, Yi : 5-bit GGUF models are preloaded",
|
| 717 |
choices=MODELS,
|
| 718 |
max_choices=1,
|
| 719 |
value=MODELS[0],
|
|
|
|
| 803 |
Speech to Text : [Whisper-large-v2](https://sanchit-gandhi-whisper-large-v2.hf.space/) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
|
| 804 |
LLM Mistral : [Mistral-7b-instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) as the chat model, GGUF Q5_K_M quantized version used locally via llama_cpp[huggingface_hub](TheBloke/Mistral-7B-Instruct-v0.1-GGUF).
|
| 805 |
LLM Zephyr : [Zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF).
|
| 806 |
+
LLM Yi : [Yi-6B-200K](https://huggingface.co/01-ai/Yi-6B-200K) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/Yi-6B-200K-GGUF).
|
| 807 |
+
Text to Speech : [Coqui's XTTS V2](https://huggingface.co/spaces/coqui/xtts) as a Multilingual TTS model, to generate the chatbot answers. This time, the model is hosted locally.
|
| 808 |
|
| 809 |
Note:
|
| 810 |
- By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml
|