DylanZimmer commited on
Commit
4ecd9e6
·
1 Parent(s): 4a043ab

SmolL suggested way

Browse files
Files changed (1) hide show
  1. app.py +49 -42
app.py CHANGED
@@ -1,56 +1,63 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
-
4
- pipe = pipeline(
5
- "text-generation",
6
- model="HuggingFaceTB/SmolLM3-3B-Base",
7
- device_map="auto"
8
- )
9
-
10
- MAX_HISTORY = 10 # keep last 10 exchanges to avoid huge payloads
11
-
 
 
 
 
 
 
12
  def respond(message, history):
13
- # Initialize history if empty
14
- if history is None:
15
- history = []
16
-
17
- # Convert to OpenAI-style messages
18
- messages = []
19
- for user_msg, bot_msg in history[-MAX_HISTORY:]:
20
- messages.append({"role": "user", "content": user_msg})
21
- messages.append({"role": "assistant", "content": bot_msg})
22
-
23
- messages.append({"role": "user", "content": message})
24
-
25
- # Build prompt
26
- conversation_text = ""
27
- for m in messages:
28
- conversation_text += f"{m['role'].capitalize()}: {m['content']}\n"
29
- conversation_text += "Assistant:"
30
-
31
- # Generate reply
32
- outputs = pipe(
33
- conversation_text,
34
- max_new_tokens=256,
35
  temperature=0.7,
36
  top_p=0.95,
37
  do_sample=True
38
  )
39
- reply = outputs[0]["generated_text"].replace(conversation_text, "").strip()
40
 
41
- # Update history and trim
42
- history.append([message, reply])
43
- history = history[-MAX_HISTORY:]
 
 
 
44
 
45
  return reply, history
46
 
47
- # Chatbot component
48
- chatbot = gr.Chatbot(height=400)
49
- iface = gr.Interface(
 
50
  fn=respond,
51
- inputs=[gr.Textbox(placeholder="Type a message..."), gr.State([])],
52
- outputs=[chatbot, gr.State()]
 
 
53
  )
54
 
 
 
 
55
  if __name__ == "__main__":
56
- iface.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ # ------------------------
6
+ # Model Setup
7
+ # ------------------------
8
+ model_name = "HuggingFaceTB/SmolLM3-3B"
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
13
+
14
+ # ------------------------
15
+ # Chat Function
16
+ # ------------------------
17
+ # `history` is a list of dicts: {"role": "user"/"assistant", "content": str}
18
  def respond(message, history):
19
+ # Append current user message
20
+ history = history + [{"role": "user", "content": message}]
21
+
22
+ # Build input with tokenizer’s chat template
23
+ text = tokenizer.apply_chat_template(
24
+ history,
25
+ tokenize=False,
26
+ add_generation_prompt=True
27
+ )
28
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
29
+
30
+ # Generate response
31
+ generated_ids = model.generate(
32
+ **model_inputs,
33
+ max_new_tokens=512,
 
 
 
 
 
 
 
34
  temperature=0.7,
35
  top_p=0.95,
36
  do_sample=True
37
  )
 
38
 
39
+ # Extract only the model's reply (exclude input tokens)
40
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
41
+ reply = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
42
+
43
+ # Append assistant reply to history
44
+ history.append({"role": "assistant", "content": reply})
45
 
46
  return reply, history
47
 
48
+ # ------------------------
49
+ # Gradio Interface
50
+ # ------------------------
51
+ demo = gr.ChatInterface(
52
  fn=respond,
53
+ chatbot=gr.Chatbot(type="messages", height=400),
54
+ textbox=gr.Textbox(placeholder="Type a message..."),
55
+ title="SmallChat with History",
56
+ description="Persistent chat history using OpenAI-style messages"
57
  )
58
 
59
+ # ------------------------
60
+ # Launch
61
+ # ------------------------
62
  if __name__ == "__main__":
63
+ demo.launch()