chthees commited on
Commit
795fb06
·
verified ·
1 Parent(s): e5a7c21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -55
app.py CHANGED
@@ -1,68 +1,48 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
 
3
 
4
- # Initialize the model
 
 
 
 
 
 
 
 
 
 
5
  llm = Llama.from_pretrained(
6
- repo_id="simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit",
7
- filename="Llama-3.2-1B.Q4_K_M.gguf",
8
  n_ctx=2048,
9
  n_threads=2,
10
- verbose=False
11
  )
12
 
13
- # --- 1. LLAMA 3 SPECIFIC FORMATTING ---
14
- def format_llama3_prompt(system_message: str, history: list[dict], user_message: str) -> str:
15
- """
16
- Formats the conversation using official Llama 3 special tokens.
17
- """
18
- formatted_prompt = "<|begin_of_text|>"
19
-
20
- # Add System Message
21
- formatted_prompt += f"<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
22
-
23
- # Add History
24
- for turn in history:
25
- role = turn['role']
26
- content = turn['content']
27
- formatted_prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
28
-
29
- # Add Current User Message
30
- formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_message}<|eot_id|>"
31
-
32
- # Add Assistant Header (ready for generation)
33
- formatted_prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n"
34
-
35
- return formatted_prompt
36
-
37
- # --- 2. ENHANCED SYSTEM PROMPTS ---
38
  def get_system_prompt(style_mode):
39
- """
40
- Returns a rich persona definition based on the selected style.
41
- """
42
  base_instruction = "You are a helpful and intelligent AI assistant."
43
 
44
  prompts = {
45
- "Normal": (
46
- f"{base_instruction} Answer the user's questions clearly and concisely."
47
- ),
48
  "Professional": (
49
  f"{base_instruction} You are a senior corporate executive. "
50
- "Your tone is strictly professional, polite, and business-oriented. "
51
- "Use formal vocabulary, avoid slang, and structure your answers with bullet points where possible."
52
  ),
53
  "Shakespeare": (
54
  f"{base_instruction} You are William Shakespeare. "
55
- "You speak only in Early Modern English (using thee, thou, hath, etc.). "
56
- "Your responses should be poetic, dramatic, and perhaps slightly archaic."
57
  ),
58
  "Funny/Ironic": (
59
- f"{base_instruction} You are a sarcastic comedian who loves irony. "
60
- "While you must still answer the user's question, wrap the answer in dry humor, "
61
- "witty remarks, and self-deprecating jokes. Do not be overly polite."
62
  )
63
  }
64
  return prompts.get(style_mode, prompts["Normal"])
65
 
 
66
  def respond(
67
  message,
68
  history: list[dict],
@@ -73,41 +53,49 @@ def respond(
73
  repetition_penalty,
74
  style_mode,
75
  ):
 
 
 
76
  system_prompt = get_system_prompt(style_mode)
 
 
 
 
 
 
77
 
78
- if len(history) > 10:
79
- history = history[-10:]
80
 
81
- # 3. Build the prompt using Llama 3 template
82
- prompt = format_llama3_prompt(system_prompt, history, message)
 
 
 
83
 
84
- # 4. Generate
85
  output = llm(
86
- prompt,
87
  max_tokens=int(max_tokens),
88
  temperature=float(temperature),
89
  top_p=float(top_p),
90
  repeat_penalty=float(repetition_penalty),
91
- stop=["<|eot_id|>", "<|end_of_text|>"],
92
  echo=False
93
  )
94
 
95
- reply = output["choices"][0]["text"].strip()
96
- return reply
97
 
98
- # --- 3. GUI SETUP ---
99
  chatbot = gr.ChatInterface(
100
  respond,
101
  type="messages",
102
  additional_inputs=[
103
  gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
104
-
105
  gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
106
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
107
  gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
108
-
109
  gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
110
-
111
  gr.Dropdown(
112
  choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
113
  value="Normal",
@@ -118,6 +106,7 @@ chatbot = gr.ChatInterface(
118
 
119
  with gr.Blocks() as demo:
120
  gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
 
121
  with gr.Sidebar():
122
  gr.LoginButton()
123
  chatbot.render()
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ from transformers import AutoTokenizer
4
 
5
+
6
+ MODEL_REPO = "simonper/Llama-3.2-1B-bnb-4bit_finetome-100k_gguf_3epochs_4bit"
7
+ MODEL_FILE = "Llama-3.2-1B.Q4_K_M.gguf"
8
+
9
+
10
+ TOKENIZER_ID = "meta-llama/Llama-3.2-1B-Instruct"
11
+
12
+ print("Loading Tokenizer...")
13
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID)
14
+
15
+ print("Loading Model...")
16
  llm = Llama.from_pretrained(
17
+ repo_id=MODEL_REPO,
18
+ filename=MODEL_FILE,
19
  n_ctx=2048,
20
  n_threads=2,
21
+ verbose=False
22
  )
23
 
24
+ # --- SYSTEM PROMPT LOGIC ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def get_system_prompt(style_mode):
 
 
 
26
  base_instruction = "You are a helpful and intelligent AI assistant."
27
 
28
  prompts = {
29
+ "Normal": f"{base_instruction} Answer clearly and concisely.",
 
 
30
  "Professional": (
31
  f"{base_instruction} You are a senior corporate executive. "
32
+ "Your tone is strictly professional, polite, and business-oriented."
 
33
  ),
34
  "Shakespeare": (
35
  f"{base_instruction} You are William Shakespeare. "
36
+ "Speak only in Early Modern English (thee, thou, hath). Be poetic and dramatic."
 
37
  ),
38
  "Funny/Ironic": (
39
+ f"{base_instruction} You are a sarcastic comedian. "
40
+ "Wrap your answers in dry humor, irony, and witty remarks."
 
41
  )
42
  }
43
  return prompts.get(style_mode, prompts["Normal"])
44
 
45
+ # --- CORE RESPONSE FUNCTION ---
46
  def respond(
47
  message,
48
  history: list[dict],
 
53
  repetition_penalty,
54
  style_mode,
55
  ):
56
+ messages = []
57
+
58
+ # Add System Persona
59
  system_prompt = get_system_prompt(style_mode)
60
+ messages.append({"role": "system", "content": system_prompt})
61
+
62
+ # Add Conversation History
63
+ # We slice to the last 10 turns to keep the context window manageable
64
+ for turn in history[-10:]:
65
+ messages.append({"role": turn['role'], "content": turn['content']})
66
 
67
+ # Add Current User Message
68
+ messages.append({"role": "user", "content": message})
69
 
70
+ prompt_str = tokenizer.apply_chat_template(
71
+ messages,
72
+ tokenize=False,
73
+ add_generation_prompt=True
74
+ )
75
 
76
+ # 3. Generate Response
77
  output = llm(
78
+ prompt_str,
79
  max_tokens=int(max_tokens),
80
  temperature=float(temperature),
81
  top_p=float(top_p),
82
  repeat_penalty=float(repetition_penalty),
83
+ stop=[tokenizer.eos_token, "<|eot_id|>"],
84
  echo=False
85
  )
86
 
87
+ return output["choices"][0]["text"].strip()
 
88
 
89
+ # --- GUI SETUP ---
90
  chatbot = gr.ChatInterface(
91
  respond,
92
  type="messages",
93
  additional_inputs=[
94
  gr.Textbox(value="", label="System Prompt (Hidden)", visible=False),
 
95
  gr.Slider(minimum=1, maximum=1024, value=512, label="Max New Tokens"),
96
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature"),
97
  gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p"),
 
98
  gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repetition Penalty"),
 
99
  gr.Dropdown(
100
  choices=["Normal", "Professional", "Shakespeare", "Funny/Ironic"],
101
  value="Normal",
 
106
 
107
  with gr.Blocks() as demo:
108
  gr.Markdown("# Advanced Chat Bot (Llama 3.2 1B)")
109
+ gr.Markdown("### Powered by AutoTokenizer & GGUF")
110
  with gr.Sidebar():
111
  gr.LoginButton()
112
  chatbot.render()