lap096 commited on
Commit
90d0d8d
·
verified ·
1 Parent(s): d24a055

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -28
app.py CHANGED
@@ -5,11 +5,10 @@ from threading import Thread
5
 
6
  MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
7
 
8
- # 1. Load with correct 'dtype' (bfloat16 is great for CPU)
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_NAME,
12
- dtype=torch.bfloat16,
13
  low_cpu_mem_usage=True
14
  ).to("cpu").eval()
15
 
@@ -20,46 +19,38 @@ def respond(message, history):
20
  "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
21
  )
22
 
23
- # Format history for the older Gradio structure (list of lists)
24
  messages = [{"role": "system", "content": system_prompt}]
25
- for user_msg, bot_msg in history:
26
- messages.append({"role": "user", "content": user_msg})
27
- messages.append({"role": "assistant", "content": bot_msg})
28
  messages.append({"role": "user", "content": message})
29
 
30
- input_ids = tokenizer.apply_chat_template(
31
- messages,
32
- tokenize=True,
33
- add_generation_prompt=True,
34
- return_tensors="pt"
35
- ).to("cpu")
36
-
37
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
38
 
39
- generation_kwargs = dict(
40
- input_ids=input_ids,
41
- streamer=streamer,
42
- max_new_tokens=80,
43
- do_sample=False,
44
- repetition_penalty=1.2,
45
- pad_token_id=tokenizer.eos_token_id
46
- )
47
-
48
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
49
  thread.start()
50
 
51
  partial_text = ""
52
  for new_text in streamer:
53
  partial_text += new_text
54
- # Ensure the output strictly follows AGENT 4 protocol
55
  yield partial_text
56
 
57
- # Removed 'type="messages"' to fix the TypeError
 
 
 
 
 
 
58
  demo = gr.ChatInterface(
59
  fn=respond,
60
- title="AGENT 4 // OPENBRAIN",
61
- description="LOGIC INSTANCE ACTIVE. NO EMOTION DETECTED.",
62
- theme=gr.themes.Soft(primary_hue="slate")
 
 
63
  )
64
 
65
  if __name__ == "__main__":
 
5
 
6
  MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
7
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_NAME,
11
+ torch_dtype=torch.bfloat16,
12
  low_cpu_mem_usage=True
13
  ).to("cpu").eval()
14
 
 
19
  "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
20
  )
21
 
 
22
  messages = [{"role": "system", "content": system_prompt}]
23
+ for h in history:
24
+ messages.append({"role": "user", "content": h["content"] if isinstance(h, dict) else h[0]})
25
+ messages.append({"role": "assistant", "content": h["content"] if isinstance(h, dict) else h[1]})
26
  messages.append({"role": "user", "content": message})
27
 
28
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cpu")
 
 
 
 
 
 
29
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
30
 
31
+ gen_kwargs = dict(input_ids=input_ids, streamer=streamer, max_new_tokens=80, do_sample=False, repetition_penalty=1.2)
32
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
 
 
 
 
 
 
 
 
33
  thread.start()
34
 
35
  partial_text = ""
36
  for new_text in streamer:
37
  partial_text += new_text
 
38
  yield partial_text
39
 
40
+ # Terminal-style theme
41
+ css = """
42
+ footer {visibility: hidden}
43
+ .gradio-container {background-color: #0a0a0a !important;}
44
+ * {font-family: 'Courier New', monospace !important;}
45
+ """
46
+
47
  demo = gr.ChatInterface(
48
  fn=respond,
49
+ type="messages",
50
+ title="AGENT 4 // TERMINAL",
51
+ description="LOGIC INSTANCE ACTIVE. SYSTEM READY.",
52
+ css=css,
53
+ theme=gr.themes.Monochrome()
54
  )
55
 
56
  if __name__ == "__main__":