chthees commited on
Commit
e5a7c21
·
verified ·
1 Parent(s): 1960bbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -43
app.py CHANGED
@@ -1,32 +1,71 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
 
4
  llm = Llama.from_pretrained(
5
  repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
6
  filename="Llama-3.2-1B.Q4_K_M.gguf",
7
  n_ctx=2048,
8
  n_threads=2,
 
9
  )
10
 
11
- def build_prompt(system_message: str, history: list[dict], user_message: str) -> str:
12
- lines = []
13
- if system_message:
14
- lines.append(f"System: {system_message}\n")
 
 
 
 
 
 
 
15
  for turn in history:
16
- role = turn["role"]
17
- content = turn["content"]
18
- if role == "user":
19
- lines.append(f"User: {content}")
20
- elif role == "assistant":
21
- lines.append(f"Assistant: {content}")
22
- lines.append(f"User: {user_message}")
23
- lines.append("Assistant:")
24
- return "\n".join(lines)
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def respond(
28
  message,
29
- history: list[dict[str, str]],
30
  system_message_dummy,
31
  max_tokens,
32
  temperature,
@@ -34,55 +73,41 @@ def respond(
34
  repetition_penalty,
35
  style_mode,
36
  ):
37
-
38
- # Translated instruction
39
- base_instruction = (
40
- "You are a ChatBot that answers questions in different styles and can hold conversations. "
41
- "Please always answer in the following style: "
42
- )
43
-
44
- context = ""
45
- # Logic keys updated to match the English Dropdown choices below
46
- if style_mode == "Professional":
47
- context = "Formulate the answer extremely politely and professionally (Business English)."
48
- elif style_mode == "Shakespeare":
49
- context = "Formulate the answer in old-fashioned, poetic English."
50
- elif style_mode == "Funny/Ironic":
51
- context = "Formulate the answer in a funny and ironic way. Include jokes."
52
- else:
53
- context = "Answer normally."
54
 
55
- final_system = f"{base_instruction} {context}"
 
56
 
57
- prompt = build_prompt(final_system, history, message)
 
58
 
 
59
  output = llm(
60
  prompt,
61
  max_tokens=int(max_tokens),
62
  temperature=float(temperature),
63
  top_p=float(top_p),
64
  repeat_penalty=float(repetition_penalty),
65
- stop=["User:", "System:"],
66
  echo=False
67
  )
68
 
69
  reply = output["choices"][0]["text"].strip()
70
  return reply
71
 
72
-
73
- # --- 4. GUI SETUP ---
74
  chatbot = gr.ChatInterface(
75
  respond,
76
  type="messages",
77
  additional_inputs=[
78
  gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
79
 
80
- gr.Slider(minimum=1, maximum=2048, value=1024, label="Max Tokens"),
81
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"),
82
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top-p"),
83
- gr.Slider(minimum=1.0, maximum=2.0, value=1.3, step=0.05, label="Repetition Penalty"),
 
84
 
85
- # Translated Dropdown Options
86
  gr.Dropdown(
87
  choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
88
  value="Normal",
@@ -92,8 +117,7 @@ chatbot = gr.ChatInterface(
92
  )
93
 
94
  with gr.Blocks() as demo:
95
- # Translated Title
96
- gr.Markdown("# Advanced Chat Bot")
97
  with gr.Sidebar():
98
  gr.LoginButton()
99
  chatbot.render()
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
4
+ # Initialize the model
5
  llm = Llama.from_pretrained(
6
  repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
7
  filename="Llama-3.2-1B.Q4_K_M.gguf",
8
  n_ctx=2048,
9
  n_threads=2,
10
+ verbose=False
11
  )
12
 
13
+ # --- 1. LLAMA 3 SPECIFIC FORMATTING ---
14
+ def format_llama3_prompt(system_message: str, history: list[dict], user_message: str) -> str:
15
+ """
16
+ Formats the conversation using official Llama 3 special tokens.
17
+ """
18
+ formatted_prompt = "<|begin_of_text|>"
19
+
20
+ # Add System Message
21
+ formatted_prompt += f"<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
22
+
23
+ # Add History
24
  for turn in history:
25
+ role = turn['role']
26
+ content = turn['content']
27
+ formatted_prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
28
+
29
+ # Add Current User Message
30
+ formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_message}<|eot_id|>"
31
+
32
+ # Add Assistant Header (ready for generation)
33
+ formatted_prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
34
+
35
+ return formatted_prompt
36
 
37
+ # --- 2. ENHANCED SYSTEM PROMPTS ---
38
+ def get_system_prompt(style_mode):
39
+ """
40
+ Returns a rich persona definition based on the selected style.
41
+ """
42
+ base_instruction = "You are a helpful and intelligent AI assistant."
43
+
44
+ prompts = {
45
+ "Normal": (
46
+ f"{base_instruction} Answer the user's questions clearly and concisely."
47
+ ),
48
+ "Professional": (
49
+ f"{base_instruction} You are a senior corporate executive. "
50
+ "Your tone is strictly professional, polite, and business-oriented. "
51
+ "Use formal vocabulary, avoid slang, and structure your answers with bullet points where possible."
52
+ ),
53
+ "Shakespeare": (
54
+ f"{base_instruction} You are William Shakespeare. "
55
+ "You speak only in Early Modern English (using thee, thou, hath, etc.). "
56
+ "Your responses should be poetic, dramatic, and perhaps slightly archaic."
57
+ ),
58
+ "Funny/Ironic": (
59
+ f"{base_instruction} You are a sarcastic comedian who loves irony. "
60
+ "While you must still answer the user's question, wrap the answer in dry humor, "
61
+ "witty remarks, and self-deprecating jokes. Do not be overly polite."
62
+ )
63
+ }
64
+ return prompts.get(style_mode, prompts["Normal"])
65
 
66
  def respond(
67
  message,
68
+ history: list[dict],
69
  system_message_dummy,
70
  max_tokens,
71
  temperature,
 
73
  repetition_penalty,
74
  style_mode,
75
  ):
76
+ system_prompt = get_system_prompt(style_mode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ if len(history) > 10:
79
+ history = history[-10:]
80
 
81
+ # 3. Build the prompt using Llama 3 template
82
+ prompt = format_llama3_prompt(system_prompt, history, message)
83
 
84
+ # 4. Generate
85
  output = llm(
86
  prompt,
87
  max_tokens=int(max_tokens),
88
  temperature=float(temperature),
89
  top_p=float(top_p),
90
  repeat_penalty=float(repetition_penalty),
91
+ stop=["<|eot_id|>", "<|end_of_text|>"],
92
  echo=False
93
  )
94
 
95
  reply = output["choices"][0]["text"].strip()
96
  return reply
97
 
98
+ # --- 3. GUI SETUP ---
 
99
  chatbot = gr.ChatInterface(
100
  respond,
101
  type="messages",
102
  additional_inputs=[
103
  gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
104
 
105
+ gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
106
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
107
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
108
+
109
+ gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
110
 
 
111
  gr.Dropdown(
112
  choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
113
  value="Normal",
 
117
  )
118
 
119
  with gr.Blocks() as demo:
120
+ gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
 
121
  with gr.Sidebar():
122
  gr.LoginButton()
123
  chatbot.render()