Commit
·
374ef04
1
Parent(s):
98ee681
use embedded Mistral
Browse files
app.py
CHANGED
|
@@ -139,13 +139,14 @@ print("Downloading Zephyr 7B beta")
|
|
| 139 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
| 140 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
| 141 |
|
| 142 |
-
|
| 143 |
#Mistral
|
| 144 |
-
|
| 145 |
-
|
| 146 |
|
| 147 |
#print("Downloading Yi-6B")
|
| 148 |
#Yi-6B
|
|
|
|
| 149 |
#hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
|
| 150 |
#yi_model_path="./yi-6b.Q5_K_M.gguf"
|
| 151 |
|
|
@@ -159,9 +160,10 @@ GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
|
|
| 159 |
LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
|
| 160 |
|
| 161 |
LLAMA_VERBOSE=False
|
| 162 |
-
print("Running
|
| 163 |
-
|
| 164 |
-
|
|
|
|
| 165 |
|
| 166 |
|
| 167 |
print("Running LLM Zephyr")
|
|
@@ -254,15 +256,12 @@ def generate_local(
|
|
| 254 |
llm_model = "Yi"
|
| 255 |
llm = llm_yi
|
| 256 |
max_tokens= round(max_tokens/2)
|
| 257 |
-
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 258 |
-
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 259 |
-
|
| 260 |
else:
|
| 261 |
llm_provider= "Mistral"
|
| 262 |
llm_model = "Mistral"
|
| 263 |
llm = llm_mistral
|
| 264 |
-
|
| 265 |
-
|
| 266 |
|
| 267 |
if "yi" in llm_model.lower():
|
| 268 |
formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
|
|
@@ -271,8 +270,8 @@ def generate_local(
|
|
| 271 |
|
| 272 |
try:
|
| 273 |
print("LLM Input:", formatted_prompt)
|
| 274 |
-
if llm_model=="
|
| 275 |
-
#
|
| 276 |
generate_kwargs = dict(
|
| 277 |
temperature=temperature,
|
| 278 |
max_new_tokens=max_tokens,
|
|
@@ -744,7 +743,7 @@ EXAMPLES = [
|
|
| 744 |
|
| 745 |
]
|
| 746 |
|
| 747 |
-
MODELS = ["
|
| 748 |
|
| 749 |
OTHER_HTML=f"""<div>
|
| 750 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
|
|
| 139 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
| 140 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
| 141 |
|
| 142 |
+
print("Downloading Mistral 7B Instruct")
|
| 143 |
#Mistral
|
| 144 |
+
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
| 145 |
+
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
| 146 |
|
| 147 |
#print("Downloading Yi-6B")
|
| 148 |
#Yi-6B
|
| 149 |
+
# Note current Yi is text-generation model not an instruct based model
|
| 150 |
#hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
|
| 151 |
#yi_model_path="./yi-6b.Q5_K_M.gguf"
|
| 152 |
|
|
|
|
| 160 |
LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
|
| 161 |
|
| 162 |
LLAMA_VERBOSE=False
|
| 163 |
+
print("Running Mistral")
|
| 164 |
+
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
| 165 |
+
#print("Running LLM Mistral as InferenceClient")
|
| 166 |
+
#llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
| 167 |
|
| 168 |
|
| 169 |
print("Running LLM Zephyr")
|
|
|
|
| 256 |
llm_model = "Yi"
|
| 257 |
llm = llm_yi
|
| 258 |
max_tokens= round(max_tokens/2)
|
|
|
|
|
|
|
|
|
|
| 259 |
else:
|
| 260 |
llm_provider= "Mistral"
|
| 261 |
llm_model = "Mistral"
|
| 262 |
llm = llm_mistral
|
| 263 |
+
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 264 |
+
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
| 265 |
|
| 266 |
if "yi" in llm_model.lower():
|
| 267 |
formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
|
|
|
|
| 270 |
|
| 271 |
try:
|
| 272 |
print("LLM Input:", formatted_prompt)
|
| 273 |
+
if llm_model=="OTHER":
|
| 274 |
+
# Mistral endpoint too many Queues, wait time..
|
| 275 |
generate_kwargs = dict(
|
| 276 |
temperature=temperature,
|
| 277 |
max_new_tokens=max_tokens,
|
|
|
|
| 743 |
|
| 744 |
]
|
| 745 |
|
| 746 |
+
MODELS = ["Zephyr 7B Beta","Mistral 7B Instruct"]
|
| 747 |
|
| 748 |
OTHER_HTML=f"""<div>
|
| 749 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|