Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,32 +1,71 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from llama_cpp import Llama
|
| 3 |
|
|
|
|
| 4 |
llm = Llama.from_pretrained(
|
| 5 |
repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
|
| 6 |
filename="Llama-3.2-1B.Q4_K_M.gguf",
|
| 7 |
n_ctx=2048,
|
| 8 |
n_threads=2,
|
|
|
|
| 9 |
)
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
for turn in history:
|
| 16 |
-
role = turn[
|
| 17 |
-
content = turn[
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
def respond(
|
| 28 |
message,
|
| 29 |
-
history: list[dict
|
| 30 |
system_message_dummy,
|
| 31 |
max_tokens,
|
| 32 |
temperature,
|
|
@@ -34,55 +73,41 @@ def respond(
|
|
| 34 |
repetition_penalty,
|
| 35 |
style_mode,
|
| 36 |
):
|
| 37 |
-
|
| 38 |
-
# Translated instruction
|
| 39 |
-
base_instruction = (
|
| 40 |
-
"You are a ChatBot that answers questions in different styles and can hold conversations. "
|
| 41 |
-
"Please always answer in the following style: "
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
context = ""
|
| 45 |
-
# Logic keys updated to match the English Dropdown choices below
|
| 46 |
-
if style_mode == "Professional":
|
| 47 |
-
context = "Formulate the answer extremely politely and professionally (Business English)."
|
| 48 |
-
elif style_mode == "Shakespeare":
|
| 49 |
-
context = "Formulate the answer in old-fashioned, poetic English."
|
| 50 |
-
elif style_mode == "Funny/Ironic":
|
| 51 |
-
context = "Formulate the answer in a funny and ironic way. Include jokes."
|
| 52 |
-
else:
|
| 53 |
-
context = "Answer normally."
|
| 54 |
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
-
prompt
|
|
|
|
| 58 |
|
|
|
|
| 59 |
output = llm(
|
| 60 |
prompt,
|
| 61 |
max_tokens=int(max_tokens),
|
| 62 |
temperature=float(temperature),
|
| 63 |
top_p=float(top_p),
|
| 64 |
repeat_penalty=float(repetition_penalty),
|
| 65 |
-
stop=["
|
| 66 |
echo=False
|
| 67 |
)
|
| 68 |
|
| 69 |
reply = output["choices"][0]["text"].strip()
|
| 70 |
return reply
|
| 71 |
|
| 72 |
-
|
| 73 |
-
# --- 4. GUI SETUP ---
|
| 74 |
chatbot = gr.ChatInterface(
|
| 75 |
respond,
|
| 76 |
type="messages",
|
| 77 |
additional_inputs=[
|
| 78 |
gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
|
| 79 |
|
| 80 |
-
gr.Slider(minimum=1, maximum=
|
| 81 |
-
gr.Slider(minimum=0.1, maximum=
|
| 82 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.
|
| 83 |
-
|
|
|
|
| 84 |
|
| 85 |
-
# Translated Dropdown Options
|
| 86 |
gr.Dropdown(
|
| 87 |
choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
|
| 88 |
value="Normal",
|
|
@@ -92,8 +117,7 @@ chatbot = gr.ChatInterface(
|
|
| 92 |
)
|
| 93 |
|
| 94 |
with gr.Blocks() as demo:
|
| 95 |
-
#
|
| 96 |
-
gr.Markdown("# Advanced Chat Bot")
|
| 97 |
with gr.Sidebar():
|
| 98 |
gr.LoginButton()
|
| 99 |
chatbot.render()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from llama_cpp import Llama
|
| 3 |
|
| 4 |
+
# Initialize the model
|
| 5 |
llm = Llama.from_pretrained(
|
| 6 |
repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
|
| 7 |
filename="Llama-3.2-1B.Q4_K_M.gguf",
|
| 8 |
n_ctx=2048,
|
| 9 |
n_threads=2,
|
| 10 |
+
verbose=False
|
| 11 |
)
|
| 12 |
|
| 13 |
+
# --- 1. LLAMA 3 SPECIFIC FORMATTING ---
|
| 14 |
+
def format_llama3_prompt(system_message: str, history: list[dict], user_message: str) -> str:
|
| 15 |
+
"""
|
| 16 |
+
Formats the conversation using official Llama 3 special tokens.
|
| 17 |
+
"""
|
| 18 |
+
formatted_prompt = "<|begin_of_text|>"
|
| 19 |
+
|
| 20 |
+
# Add System Message
|
| 21 |
+
formatted_prompt += f"<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
|
| 22 |
+
|
| 23 |
+
# Add History
|
| 24 |
for turn in history:
|
| 25 |
+
role = turn['role']
|
| 26 |
+
content = turn['content']
|
| 27 |
+
formatted_prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
|
| 28 |
+
|
| 29 |
+
# Add Current User Message
|
| 30 |
+
formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_message}<|eot_id|>"
|
| 31 |
+
|
| 32 |
+
# Add Assistant Header (ready for generation)
|
| 33 |
+
formatted_prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
|
| 34 |
+
|
| 35 |
+
return formatted_prompt
|
| 36 |
|
| 37 |
+
# --- 2. ENHANCED SYSTEM PROMPTS ---
|
| 38 |
+
def get_system_prompt(style_mode):
|
| 39 |
+
"""
|
| 40 |
+
Returns a rich persona definition based on the selected style.
|
| 41 |
+
"""
|
| 42 |
+
base_instruction = "You are a helpful and intelligent AI assistant."
|
| 43 |
+
|
| 44 |
+
prompts = {
|
| 45 |
+
"Normal": (
|
| 46 |
+
f"{base_instruction} Answer the user's questions clearly and concisely."
|
| 47 |
+
),
|
| 48 |
+
"Professional": (
|
| 49 |
+
f"{base_instruction} You are a senior corporate executive. "
|
| 50 |
+
"Your tone is strictly professional, polite, and business-oriented. "
|
| 51 |
+
"Use formal vocabulary, avoid slang, and structure your answers with bullet points where possible."
|
| 52 |
+
),
|
| 53 |
+
"Shakespeare": (
|
| 54 |
+
f"{base_instruction} You are William Shakespeare. "
|
| 55 |
+
"You speak only in Early Modern English (using thee, thou, hath, etc.). "
|
| 56 |
+
"Your responses should be poetic, dramatic, and perhaps slightly archaic."
|
| 57 |
+
),
|
| 58 |
+
"Funny/Ironic": (
|
| 59 |
+
f"{base_instruction} You are a sarcastic comedian who loves irony. "
|
| 60 |
+
"While you must still answer the user's question, wrap the answer in dry humor, "
|
| 61 |
+
"witty remarks, and self-deprecating jokes. Do not be overly polite."
|
| 62 |
+
)
|
| 63 |
+
}
|
| 64 |
+
return prompts.get(style_mode, prompts["Normal"])
|
| 65 |
|
| 66 |
def respond(
|
| 67 |
message,
|
| 68 |
+
history: list[dict],
|
| 69 |
system_message_dummy,
|
| 70 |
max_tokens,
|
| 71 |
temperature,
|
|
|
|
| 73 |
repetition_penalty,
|
| 74 |
style_mode,
|
| 75 |
):
|
| 76 |
+
system_prompt = get_system_prompt(style_mode)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
if len(history) > 10:
|
| 79 |
+
history = history[-10:]
|
| 80 |
|
| 81 |
+
# 3. Build the prompt using Llama 3 template
|
| 82 |
+
prompt = format_llama3_prompt(system_prompt, history, message)
|
| 83 |
|
| 84 |
+
# 4. Generate
|
| 85 |
output = llm(
|
| 86 |
prompt,
|
| 87 |
max_tokens=int(max_tokens),
|
| 88 |
temperature=float(temperature),
|
| 89 |
top_p=float(top_p),
|
| 90 |
repeat_penalty=float(repetition_penalty),
|
| 91 |
+
stop=["<|eot_id|>", "<|end_of_text|>"],
|
| 92 |
echo=False
|
| 93 |
)
|
| 94 |
|
| 95 |
reply = output["choices"][0]["text"].strip()
|
| 96 |
return reply
|
| 97 |
|
| 98 |
+
# --- 3. GUI SETUP ---
|
|
|
|
| 99 |
chatbot = gr.ChatInterface(
|
| 100 |
respond,
|
| 101 |
type="messages",
|
| 102 |
additional_inputs=[
|
| 103 |
gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
|
| 104 |
|
| 105 |
+
gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
|
| 106 |
+
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
|
| 107 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
|
| 108 |
+
|
| 109 |
+
gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
|
| 110 |
|
|
|
|
| 111 |
gr.Dropdown(
|
| 112 |
choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
|
| 113 |
value="Normal",
|
|
|
|
| 117 |
)
|
| 118 |
|
| 119 |
with gr.Blocks() as demo:
|
| 120 |
+
gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
|
|
|
|
| 121 |
with gr.Sidebar():
|
| 122 |
gr.LoginButton()
|
| 123 |
chatbot.render()
|