Spaces:

chthees
/

plain

Sleeping

App Files Files Community

chthees commited on Dec 3, 2025

Commit

795fb06

verified ·

1 Parent(s): e5a7c21

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -55

app.py CHANGED Viewed

@@ -1,68 +1,48 @@
 import gradio as gr
 from llama_cpp import Llama
-# Initialize the model
 llm = Llama.from_pretrained(
-    repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
-    filename="Llama-3.2-1B.Q4_K_M.gguf",
     n_ctx=2048,
     n_threads=2,
-    verbose=False
 )
-# --- 1. LLAMA 3 SPECIFIC FORMATTING ---
-def format_llama3_prompt(system_message: str, history: list[dict], user_message: str) -> str:
-    """
-    Formats the conversation using official Llama 3 special tokens.
-    """
-    formatted_prompt = "<|begin_of_text|>"
-    # Add System Message
-    formatted_prompt += f"<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
-    # Add History
-    for turn in history:
-        role = turn['role']
-        content = turn['content']
-        formatted_prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
-    # Add Current User Message
-    formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_message}<|eot_id|>"
-    # Add Assistant Header (ready for generation)
-    formatted_prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
-    return formatted_prompt
-# --- 2. ENHANCED SYSTEM PROMPTS ---
 def get_system_prompt(style_mode):
-    """
-    Returns a rich persona definition based on the selected style.
-    """
     base_instruction = "You are a helpful and intelligent AI assistant."
     prompts = {
-        "Normal": (
-            f"{base_instruction} Answer the user's questions clearly and concisely."
-        ),
         "Professional": (
             f"{base_instruction} You are a senior corporate executive. "
-            "Your tone is strictly professional, polite, and business-oriented. "
-            "Use formal vocabulary, avoid slang, and structure your answers with bullet points where possible."
         ),
         "Shakespeare": (
             f"{base_instruction} You are William Shakespeare. "
-            "You speak only in Early Modern English (using thee, thou, hath, etc.). "
-            "Your responses should be poetic, dramatic, and perhaps slightly archaic."
         ),
         "Funny/Ironic": (
-            f"{base_instruction} You are a sarcastic comedian who loves irony. "
-            "While you must still answer the user's question, wrap the answer in dry humor, "
-            "witty remarks, and self-deprecating jokes. Do not be overly polite."
         )
     }
     return prompts.get(style_mode, prompts["Normal"])
 def respond(
     message,
     history: list[dict],
@@ -73,41 +53,49 @@ def respond(
     repetition_penalty,
     style_mode,
 ):
     system_prompt = get_system_prompt(style_mode)
-    if len(history) > 10:
-        history = history[-10:]
-    # 3. Build the prompt using Llama 3 template
-    prompt = format_llama3_prompt(system_prompt, history, message)
-    # 4. Generate
     output = llm(
-        prompt,
         max_tokens=int(max_tokens),
         temperature=float(temperature),
         top_p=float(top_p),
         repeat_penalty=float(repetition_penalty),
-        stop=["<|eot_id|>", "<|end_of_text|>"],
         echo=False
     )
-    reply = output["choices"][0]["text"].strip()
-    return reply
-# --- 3. GUI SETUP ---
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
         gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
         gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
         gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
         gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
         gr.Dropdown(
             choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
             value="Normal",
@@ -118,6 +106,7 @@ chatbot = gr.ChatInterface(
 with gr.Blocks() as demo:
     gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
     with gr.Sidebar():
         gr.LoginButton()
     chatbot.render()

 import gradio as gr
 from llama_cpp import Llama
+from transformers import AutoTokenizer
+MODEL_REPO = "simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit"
+MODEL_FILE = "Llama-3.2-1B.Q4_K_M.gguf"
+TOKENIZER_ID = "meta-llama/Llama-3.2-1B-Instruct"
+print("Loading Tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID)
+print("Loading Model...")
 llm = Llama.from_pretrained(
+    repo_id=MODEL_REPO,
+    filename=MODEL_FILE,
     n_ctx=2048,
     n_threads=2,
+    verbose=False
 )
+# --- SYSTEM PROMPT LOGIC ---
 def get_system_prompt(style_mode):
     base_instruction = "You are a helpful and intelligent AI assistant."
     prompts = {
+        "Normal": f"{base_instruction} Answer clearly and concisely.",
         "Professional": (
             f"{base_instruction} You are a senior corporate executive. "
+            "Your tone is strictly professional, polite, and business-oriented."
         ),
         "Shakespeare": (
             f"{base_instruction} You are William Shakespeare. "
+            "Speak only in Early Modern English (thee, thou, hath). Be poetic and dramatic."
         ),
         "Funny/Ironic": (
+            f"{base_instruction} You are a sarcastic comedian. "
+            "Wrap your answers in dry humor, irony, and witty remarks."
         )
     }
     return prompts.get(style_mode, prompts["Normal"])
+# --- CORE RESPONSE FUNCTION ---
 def respond(
     message,
     history: list[dict],
     repetition_penalty,
     style_mode,
 ):
+    messages = []
+    # Add System Persona
     system_prompt = get_system_prompt(style_mode)
+    messages.append({"role": "system", "content": system_prompt})
+    # Add Conversation History
+    # We slice to the last 10 turns to keep the context window manageable
+    for turn in history[-10:]:
+        messages.append({"role": turn['role'], "content": turn['content']})
+    # Add Current User Message
+    messages.append({"role": "user", "content": message})
+    prompt_str = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    # 3. Generate Response
     output = llm(
+        prompt_str,
         max_tokens=int(max_tokens),
         temperature=float(temperature),
         top_p=float(top_p),
         repeat_penalty=float(repetition_penalty),
+        stop=[tokenizer.eos_token, "<|eot_id|>"],
         echo=False
     )
+    return output["choices"][0]["text"].strip()
+# --- GUI SETUP ---
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
         gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
         gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
         gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
         gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
         gr.Dropdown(
             choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
             value="Normal",
 with gr.Blocks() as demo:
     gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
+    gr.Markdown("### Powered by AutoTokenizer & GGUF")
     with gr.Sidebar():
         gr.LoginButton()
     chatbot.render()