Spaces:

coqui
/

voice-chat-with-mistral

Paused

gorkemgoknar commited on Nov 6, 2023

Commit

331538a

1 Parent(s): a51d57b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -158,11 +158,11 @@ from llama_cpp import Llama
 # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
 # else 35 full layers + XTTS works fine on T4 16GB
 # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
-GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 30))
 LLAMA_VERBOSE=False
 print("Running LLM Mistral")
-llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
 print("Running LLM Zephyr")
 llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
@@ -191,19 +191,18 @@ def format_prompt_mistral(message, history, system_message=system_message,system
 # Zephyr formatter
 def format_prompt_zephyr(message, history, system_message=system_message):
     prompt = (
-        "<|system|>\n" + system_message  +  "\n</s>"
     )
     for user_prompt, bot_response in history:
-        prompt += f"<|user|>\n{user_prompt} </s>"
         prompt += f"<|assistant|>\n{bot_response}</s>"
     if message=="":
         message="Hello"
     prompt += f"<|user|>\n{message}</s>"
-    prompt += f"<|assistant|>\n"
     print(prompt)
     return prompt
 def generate_local(
     prompt,
     history,

 # set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
 # else 35 full layers + XTTS works fine on T4 16GB
 # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
+GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 25))
 LLAMA_VERBOSE=False
 print("Running LLM Mistral")
+llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS+10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
 print("Running LLM Zephyr")
 llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
 # Zephyr formatter
 def format_prompt_zephyr(message, history, system_message=system_message):
     prompt = (
+        "<|system|>\n" + system_message  + "</s>"
     )
     for user_prompt, bot_response in history:
+        prompt += f"<|user|>\n{user_prompt}</s>"
         prompt += f"<|assistant|>\n{bot_response}</s>"
     if message=="":
         message="Hello"
     prompt += f"<|user|>\n{message}</s>"
+    prompt += f"<|assistant|>"
     print(prompt)
     return prompt
 def generate_local(
     prompt,
     history,