Spaces:

coqui
/

voice-chat-with-mistral

Paused

App Files Files Community

gorkemgoknar commited on Nov 6, 2023

Commit

5a7a07c

1 Parent(s): ae3c32a

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -6

app.py CHANGED Viewed

@@ -160,6 +160,8 @@ from llama_cpp import Llama
 # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
 GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
 LLAMA_VERBOSE=False
 print("Running LLM Mistral")
 llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
@@ -176,8 +178,9 @@ def format_prompt_mistral(message, history, system_message=system_message,system
     for user_prompt, bot_response in history:
         prompt += f"[INST] {user_prompt} [/INST]"
         prompt += f" {bot_response}</s> "
-    if message=="":
-        message="Hello"
     prompt += f"[INST] {message} [/INST]"
     return prompt
@@ -211,7 +214,7 @@ def generate_local(
     temperature=0.8,
     max_tokens=256,
     top_p=0.95,
-    stop = ["</s>","<|user|>"]
 ):
     temperature = float(temperature)
     if temperature < 1e-2:
@@ -236,6 +239,7 @@ def generate_local(
     try:
         stream = llm(
             formatted_prompt,
             **generate_kwargs,
@@ -254,7 +258,7 @@ def generate_local(
                 return
-            output += response["choices"][0]["text"].replace("<|assistant|>","").replace("<|user|>","").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","")
             yield output
     except Exception as e:
@@ -464,7 +468,7 @@ def get_sentence(history, chatbot_role,llm_model,system_prompt=""):
         history[-1][1] = character.replace("<|assistant|>","")
         # It is coming word by word
-        text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").strip())
         if len(text_to_generate) > 1:
             dif = len(text_to_generate) - len(sentence_list)
@@ -509,7 +513,7 @@ def get_sentence(history, chatbot_role,llm_model,system_prompt=""):
     # return that final sentence token
     try:
-        last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").strip())[-1]
         sentence_hash = hash(last_sentence)
         if sentence_hash not in sentence_hash_list:
             if stored_sentence is not None and stored_sentence_hash is not None:

 # 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
 GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
+LLM_STOP_WORDS= ["</s>","<|user|>","/s>"]
 LLAMA_VERBOSE=False
 print("Running LLM Mistral")
 llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
     for user_prompt, bot_response in history:
         prompt += f"[INST] {user_prompt} [/INST]"
         prompt += f" {bot_response}</s> "
+    #if message=="":
+    #    message="Hello"
     prompt += f"[INST] {message} [/INST]"
     return prompt
     temperature=0.8,
     max_tokens=256,
     top_p=0.95,
+    stop = LLM_STOP_WORDS
 ):
     temperature = float(temperature)
     if temperature < 1e-2:
     try:
+        print("LLM Input:", formatted_prompt)
         stream = llm(
             formatted_prompt,
             **generate_kwargs,
                 return
+            output += response["choices"][0]["text"].replace("<|assistant|>","").replace("<|user|>","")
             yield output
     except Exception as e:
         history[-1][1] = character.replace("<|assistant|>","")
         # It is coming word by word
+        text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
         if len(text_to_generate) > 1:
             dif = len(text_to_generate) - len(sentence_list)
     # return that final sentence token
     try:
+        last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
         sentence_hash = hash(last_sentence)
         if sentence_hash not in sentence_hash_list:
             if stored_sentence is not None and stored_sentence_hash is not None: