plain_untuned

Sleeping

App Files Files Community

chthees commited on Dec 3, 2025

Commit

e5a7c21

verified ·

1 Parent(s): 1960bbc

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -43

app.py CHANGED Viewed

@@ -1,32 +1,71 @@
 import gradio as gr
 from llama_cpp import Llama
 llm = Llama.from_pretrained(
     repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
     filename="Llama-3.2-1B.Q4_K_M.gguf",
     n_ctx=2048,
     n_threads=2,
 )
-def build_prompt(system_message: str, history: list[dict], user_message: str) -> str:
-    lines = []
-    if system_message:
-        lines.append(f"System: {system_message}\n")
     for turn in history:
-        role = turn["role"]
-        content = turn["content"]
-        if role == "user":
-            lines.append(f"User: {content}")
-        elif role == "assistant":
-            lines.append(f"Assistant: {content}")
-    lines.append(f"User: {user_message}")
-    lines.append("Assistant:")
-    return "\n".join(lines)
 def respond(
     message,
-    history: list[dict[str, str]],
     system_message_dummy,
     max_tokens,
     temperature,
@@ -34,55 +73,41 @@ def respond(
     repetition_penalty,
     style_mode,
 ):
-    # Translated instruction
-    base_instruction = (
-        "You are a ChatBot that answers questions in different styles and can hold conversations. "
-        "Please always answer in the following style: "
-    )
-    context = ""
-    # Logic keys updated to match the English Dropdown choices below
-    if style_mode == "Professional":
-        context = "Formulate the answer extremely politely and professionally (Business English)."
-    elif style_mode == "Shakespeare":
-        context = "Formulate the answer in old-fashioned, poetic English."
-    elif style_mode == "Funny/Ironic":
-        context = "Formulate the answer in a funny and ironic way. Include jokes."
-    else:
-        context = "Answer normally."
-    final_system = f"{base_instruction} {context}"
-    prompt = build_prompt(final_system, history, message)
     output = llm(
         prompt,
         max_tokens=int(max_tokens),
         temperature=float(temperature),
         top_p=float(top_p),
         repeat_penalty=float(repetition_penalty),
-        stop=["User:", "System:"],
         echo=False
     )
     reply = output["choices"][0]["text"].strip()
     return reply
-# --- 4. GUI SETUP ---
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
         gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
-        gr.Slider(minimum=1, maximum=2048, value=1024, label="Max Tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top-p"),
-        gr.Slider(minimum=1.0, maximum=2.0, value=1.3, step=0.05, label="Repetition Penalty"),
-        # Translated Dropdown Options
         gr.Dropdown(
             choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
             value="Normal",
@@ -92,8 +117,7 @@ chatbot = gr.ChatInterface(
 )
 with gr.Blocks() as demo:
-    # Translated Title
-    gr.Markdown("# Advanced Chat Bot")
     with gr.Sidebar():
         gr.LoginButton()
     chatbot.render()

 import gradio as gr
 from llama_cpp import Llama
+# Initialize the model
 llm = Llama.from_pretrained(
     repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
     filename="Llama-3.2-1B.Q4_K_M.gguf",
     n_ctx=2048,
     n_threads=2,
+    verbose=False
 )
+# --- 1. LLAMA 3 SPECIFIC FORMATTING ---
+def format_llama3_prompt(system_message: str, history: list[dict], user_message: str) -> str:
+    """
+    Formats the conversation using official Llama 3 special tokens.
+    """
+    formatted_prompt = "<|begin_of_text|>"
+    # Add System Message
+    formatted_prompt += f"<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
+    # Add History
     for turn in history:
+        role = turn['role']
+        content = turn['content']
+        formatted_prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
+    # Add Current User Message
+    formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_message}<|eot_id|>"
+    # Add Assistant Header (ready for generation)
+    formatted_prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
+    return formatted_prompt
+# --- 2. ENHANCED SYSTEM PROMPTS ---
+def get_system_prompt(style_mode):
+    """
+    Returns a rich persona definition based on the selected style.
+    """
+    base_instruction = "You are a helpful and intelligent AI assistant."
+    prompts = {
+        "Normal": (
+            f"{base_instruction} Answer the user's questions clearly and concisely."
+        ),
+        "Professional": (
+            f"{base_instruction} You are a senior corporate executive. "
+            "Your tone is strictly professional, polite, and business-oriented. "
+            "Use formal vocabulary, avoid slang, and structure your answers with bullet points where possible."
+        ),
+        "Shakespeare": (
+            f"{base_instruction} You are William Shakespeare. "
+            "You speak only in Early Modern English (using thee, thou, hath, etc.). "
+            "Your responses should be poetic, dramatic, and perhaps slightly archaic."
+        ),
+        "Funny/Ironic": (
+            f"{base_instruction} You are a sarcastic comedian who loves irony. "
+            "While you must still answer the user's question, wrap the answer in dry humor, "
+            "witty remarks, and self-deprecating jokes. Do not be overly polite."
+        )
+    }
+    return prompts.get(style_mode, prompts["Normal"])
 def respond(
     message,
+    history: list[dict],
     system_message_dummy,
     max_tokens,
     temperature,
     repetition_penalty,
     style_mode,
 ):
+    system_prompt = get_system_prompt(style_mode)
+    if len(history) > 10:
+        history = history[-10:]
+    # 3. Build the prompt using Llama 3 template
+    prompt = format_llama3_prompt(system_prompt, history, message)
+    # 4. Generate
     output = llm(
         prompt,
         max_tokens=int(max_tokens),
         temperature=float(temperature),
         top_p=float(top_p),
         repeat_penalty=float(repetition_penalty),
+        stop=["<|eot_id|>", "<|end_of_text|>"],
         echo=False
     )
     reply = output["choices"][0]["text"].strip()
     return reply
+# --- 3. GUI SETUP ---
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
         gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
+        gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
+        gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
         gr.Dropdown(
             choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
             value="Normal",
 )
 with gr.Blocks() as demo:
+    gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
     with gr.Sidebar():
         gr.LoginButton()
     chatbot.render()