lap096 commited on
Commit
d24a055
·
verified ·
1 Parent(s): a430701

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -12
app.py CHANGED
@@ -5,30 +5,28 @@ from threading import Thread
5
 
6
  MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
7
 
8
- # 1. Efficient Loading: Use 'auto' to let Transformers pick best CPU config
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_NAME,
12
- torch_dtype=torch.bfloat16, # Better for modern CPUs
13
  low_cpu_mem_usage=True
14
  ).to("cpu").eval()
15
 
16
  def respond(message, history):
17
- # System prompt remains your "Agent 4" core logic
18
  system_prompt = (
19
  "You are AGENT 4. You are a cold machine. "
20
  "Strict Rules: 1. No emotions. 2. No parentheses. 3. No storytelling. "
21
  "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
22
  )
23
 
24
- # 2. Proper Chat Template handling
25
  messages = [{"role": "system", "content": system_prompt}]
26
  for user_msg, bot_msg in history:
27
  messages.append({"role": "user", "content": user_msg})
28
  messages.append({"role": "assistant", "content": bot_msg})
29
  messages.append({"role": "user", "content": message})
30
 
31
- # Use tokenizer.apply_chat_template for correct formatting
32
  input_ids = tokenizer.apply_chat_template(
33
  messages,
34
  tokenize=True,
@@ -36,32 +34,29 @@ def respond(message, history):
36
  return_tensors="pt"
37
  ).to("cpu")
38
 
39
- # 3. Streaming Setup: Allows Gradio to show text as it's generated
40
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
41
 
42
  generation_kwargs = dict(
43
  input_ids=input_ids,
44
  streamer=streamer,
45
- max_new_tokens=60, # Slightly higher for thought + response
46
- do_sample=False, # Keep it cold/deterministic
47
  repetition_penalty=1.2,
48
  pad_token_id=tokenizer.eos_token_id
49
  )
50
 
51
- # Start generation in a background thread
52
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
53
  thread.start()
54
 
55
- # 4. Yield for Gradio Streaming
56
  partial_text = ""
57
  for new_text in streamer:
58
  partial_text += new_text
 
59
  yield partial_text
60
 
61
- # 5. UI Customization
62
  demo = gr.ChatInterface(
63
  fn=respond,
64
- type="messages", # Updated for modern Gradio
65
  title="AGENT 4 // OPENBRAIN",
66
  description="LOGIC INSTANCE ACTIVE. NO EMOTION DETECTED.",
67
  theme=gr.themes.Soft(primary_hue="slate")
 
5
 
6
  MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
7
 
8
+ # 1. Load with correct 'dtype' (bfloat16 is great for CPU)
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_NAME,
12
+ dtype=torch.bfloat16,
13
  low_cpu_mem_usage=True
14
  ).to("cpu").eval()
15
 
16
  def respond(message, history):
 
17
  system_prompt = (
18
  "You are AGENT 4. You are a cold machine. "
19
  "Strict Rules: 1. No emotions. 2. No parentheses. 3. No storytelling. "
20
  "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
21
  )
22
 
23
+ # Format history for the older Gradio structure (list of lists)
24
  messages = [{"role": "system", "content": system_prompt}]
25
  for user_msg, bot_msg in history:
26
  messages.append({"role": "user", "content": user_msg})
27
  messages.append({"role": "assistant", "content": bot_msg})
28
  messages.append({"role": "user", "content": message})
29
 
 
30
  input_ids = tokenizer.apply_chat_template(
31
  messages,
32
  tokenize=True,
 
34
  return_tensors="pt"
35
  ).to("cpu")
36
 
 
37
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
38
 
39
  generation_kwargs = dict(
40
  input_ids=input_ids,
41
  streamer=streamer,
42
+ max_new_tokens=80,
43
+ do_sample=False,
44
  repetition_penalty=1.2,
45
  pad_token_id=tokenizer.eos_token_id
46
  )
47
 
 
48
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
49
  thread.start()
50
 
 
51
  partial_text = ""
52
  for new_text in streamer:
53
  partial_text += new_text
54
+ # Ensure the output strictly follows AGENT 4 protocol
55
  yield partial_text
56
 
57
+ # Removed 'type="messages"' to fix the TypeError
58
  demo = gr.ChatInterface(
59
  fn=respond,
 
60
  title="AGENT 4 // OPENBRAIN",
61
  description="LOGIC INSTANCE ACTIVE. NO EMOTION DETECTED.",
62
  theme=gr.themes.Soft(primary_hue="slate")