Spaces:

Mattral
/

ORG-Chat

Sleeping

App Files Files Community

Mattral commited on May 14, 2024

Commit

d429c0c

verified ·

1 Parent(s): 166e47c

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -7

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 import random
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -13,19 +14,28 @@ system_prompt_text = "You are a smart and helpful co-worker of Thailand based mu
 with open("info.md", "r") as file:
     info_md_content = file.read()
 def format_prompt_mixtral(message, history, info_md_content):
     prompt = "<s>"
     if history:
         for user_prompt, bot_response in history:
             prompt += f"[INST] {user_prompt} [/INST]"
             prompt += f" {bot_response}</s> "
-    prompt += f"[INST] {info_md_content}\n\n{message} [/INST]"
     return prompt
 def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
-    # Prepend the system prompt to the user prompt
-    full_prompt = f"{system_prompt_text}, {prompt}"
     generate_kwargs = dict(
         temperature=temp,
         max_new_tokens=tokens,
@@ -35,7 +45,7 @@ def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
         seed=seed,
     )
-    formatted_prompt = format_prompt_mixtral(full_prompt, history, info_md_content)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream:
@@ -76,8 +86,8 @@ with gr.Blocks(auth=("Admin", "0112358")) as app:  # Add auth here
                     seed = gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
                     tokens = gr.Slider(label="Max new tokens", value=3840, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="The maximum number of tokens")
                     temp = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
-                    top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum 1.0, value=0.9)
-                    rep_p = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum 2.0, value=1.0)
     hid1 = gr.Number(value=1, visible=False)

 import gradio as gr
 from huggingface_hub import InferenceClient
 import random
+import textwrap
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 with open("info.md", "r") as file:
     info_md_content = file.read()
+# Chunk the info.md content into smaller sections
+chunk_size = 2500  # Adjust this size as needed
+info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
+def get_relevant_chunk(prompt, chunks):
+    # For simplicity, we just use the first chunk. You can improve this by adding more sophisticated logic.
+    return chunks[0]
 def format_prompt_mixtral(message, history, info_md_content):
     prompt = "<s>"
+    relevant_chunk = get_relevant_chunk(message, info_md_content)
+    prompt += f"{relevant_chunk}\n\n"  # Add the relevant chunk of info.md at the beginning
+    prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
         for user_prompt, bot_response in history:
             prompt += f"[INST] {user_prompt} [/INST]"
             prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
     return prompt
 def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
     generate_kwargs = dict(
         temperature=temp,
         max_new_tokens=tokens,
         seed=seed,
     )
+    formatted_prompt = format_prompt_mixtral(prompt, history, info_md_chunks)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
     for response in stream:
                     seed = gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
                     tokens = gr.Slider(label="Max new tokens", value=3840, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="The maximum number of tokens")
                     temp = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
+                    top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
+                    rep_p = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0)
     hid1 = gr.Number(value=1, visible=False)