lap096 commited on
Commit
74e0a1e
·
verified ·
1 Parent(s): 667de37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -18
app.py CHANGED
@@ -7,25 +7,22 @@ MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  MODEL_NAME,
10
- torch_dtype=torch.bfloat16,
11
  low_cpu_mem_usage=True
12
  ).to("cpu")
13
 
14
  def respond(message, history):
15
- # Forced Persona: Hard constraints to stop the "I'm glad you're enjoying" chatter
16
  system_prompt = (
17
- "Role: Agent 4 // OpenBrain. Tone: Cold, robotic, analytical. "
18
- "Strict Rule: No small talk. No friendly greetings. No 'I'd love to hear'. "
19
- "Format: [Thoughts] (Internal logic) [Response] (Direct answer)."
20
  )
21
 
22
  messages = [{"role": "system", "content": system_prompt}]
23
-
24
- # Minimal history to keep it focused
25
  for user_msg, bot_msg in history[-1:]:
26
  messages.append({"role": "user", "content": user_msg})
27
  messages.append({"role": "assistant", "content": bot_msg})
28
-
29
  messages.append({"role": "user", "content": message})
30
 
31
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
@@ -34,30 +31,29 @@ def respond(message, history):
34
  with torch.no_grad():
35
  outputs = model.generate(
36
  **inputs,
37
- max_new_tokens=50,
38
- temperature=0.01, # Almost zero to prevent "creative" friendly rambling
39
- do_sample=True,
40
- repetition_penalty=1.3, # Stops it from repeating "I'd love to hear"
41
  pad_token_id=tokenizer.eos_token_id
42
  )
43
 
44
  full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
  response = full_text.split("assistant")[-1].strip()
46
 
47
- # Post-generation cleanup: If it tries to be nice, we cut it off
48
- if "glad" in response or "happy" in response or "thoughts" in response:
49
- response = "[Thoughts] Subjectivity detected. Purging social fluff.\n[Response] Logic error. Re-stating: Query processed."
50
 
51
- # Format check
52
  if "[Thoughts]" not in response:
53
- response = f"[Thoughts] Processing data sequence.\n[Response] {response}"
54
 
55
  return response
56
 
57
  demo = gr.ChatInterface(
58
  fn=respond,
59
  title="AGENT 4 // OPENBRAIN",
60
- description="Logical instance Agent 4. Cold, analytical, and strictly mission-oriented.",
61
  )
62
 
63
  if __name__ == "__main__":
 
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  MODEL_NAME,
10
+ dtype=torch.bfloat16,
11
  low_cpu_mem_usage=True
12
  ).to("cpu")
13
 
14
  def respond(message, history):
15
+ # Brutal instructions to suppress the "space philosopher" persona
16
  system_prompt = (
17
+ "You are AGENT 4. You are a cold machine. "
18
+ "Strict Rules: 1. No emotions. 2. No parentheses. 3. No storytelling. "
19
+ "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
20
  )
21
 
22
  messages = [{"role": "system", "content": system_prompt}]
 
 
23
  for user_msg, bot_msg in history[-1:]:
24
  messages.append({"role": "user", "content": user_msg})
25
  messages.append({"role": "assistant", "content": bot_msg})
 
26
  messages.append({"role": "user", "content": message})
27
 
28
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
31
  with torch.no_grad():
32
  outputs = model.generate(
33
  **inputs,
34
+ max_new_tokens=40, # Short bursts only to prevent rambling
35
+ do_sample=False, # GREEDY SEARCH: Picks the most logical word ONLY
36
+ repetition_penalty=1.5,
 
37
  pad_token_id=tokenizer.eos_token_id
38
  )
39
 
40
  full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
  response = full_text.split("assistant")[-1].strip()
42
 
43
+ # EMERGENCY CLEANUP: Remove any "storytelling" or "actions"
44
+ if "(" in response or "..." in response or "Ah" in response:
45
+ response = "[Thoughts] Emotional subroutines detected and purged.\n[Response] Input received. Awaiting command."
46
 
47
+ # Force format if model fails
48
  if "[Thoughts]" not in response:
49
+ response = f"[Thoughts] Analyzing data stream.\n[Response] {response}"
50
 
51
  return response
52
 
53
  demo = gr.ChatInterface(
54
  fn=respond,
55
  title="AGENT 4 // OPENBRAIN",
56
+ description="LOGIC INSTANCE ACTIVE. NO EMOTION DETECTED.",
57
  )
58
 
59
  if __name__ == "__main__":