basmala12 commited on
Commit
0a70310
·
verified ·
1 Parent(s): 73e272c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -13
app.py CHANGED
@@ -1,10 +1,10 @@
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
- import re
4
 
5
  MODEL_NAME = "basmala12/smollm_finetuning5"
6
 
7
- # Load model and tokenizer
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
 
@@ -14,6 +14,7 @@ pipe = pipeline(
14
  tokenizer=tokenizer,
15
  )
16
 
 
17
  def truncate_to_n_sentences(text: str, n: int = 2) -> str:
18
  """Force output to a maximum of N sentences."""
19
  parts = re.split(r'([.!?])', text)
@@ -33,32 +34,69 @@ def truncate_to_n_sentences(text: str, n: int = 2) -> str:
33
 
34
  return " ".join(sentences).strip()
35
 
 
36
  def respond(message, history, system_message, max_tokens, temperature, top_p):
37
- """Main chat function."""
38
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  messages.extend(history)
40
  messages.append({"role": "user", "content": message})
41
 
 
42
  prompt = tokenizer.apply_chat_template(
43
  messages,
44
  tokenize=False,
45
- add_generation_prompt=True
46
  )
47
 
 
48
  out = pipe(
49
  prompt,
50
- max_new_tokens=max_tokens,
51
- temperature=temperature,
52
- top_p=top_p,
53
  do_sample=True,
54
  )[0]["generated_text"]
55
 
56
- # Extract assistant chunk
57
  if "<|im_start|>assistant" in out:
58
  out = out.split("<|im_start|>assistant", 1)[-1]
59
  out = out.replace("<|im_end|>", "").strip()
60
 
61
- # HARD enforce 2-sentence limit
62
  out = truncate_to_n_sentences(out, n=2)
63
 
64
  return out
@@ -70,12 +108,14 @@ chatbot = gr.ChatInterface(
70
  additional_inputs=[
71
  gr.Textbox(
72
  value="Answer in 1–2 short sentences with brief logical reasoning. Do not exceed 2 sentences.",
73
- label="System message"
74
  ),
75
- gr.Slider(1, 128, value=64, step=1, label="Max new tokens"), # force brevity
76
- gr.Slider(0.1, 2.0, value=0.3, step=0.1, label="Temperature"), # lower = shorter
77
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
78
  ],
 
 
79
  )
80
 
81
  if __name__ == "__main__":
 
1
+ import re
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
4
 
5
  MODEL_NAME = "basmala12/smollm_finetuning5"
6
 
7
+ # Load model & tokenizer once
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
 
 
14
  tokenizer=tokenizer,
15
  )
16
 
17
+
18
  def truncate_to_n_sentences(text: str, n: int = 2) -> str:
19
  """Force output to a maximum of N sentences."""
20
  parts = re.split(r'([.!?])', text)
 
34
 
35
  return " ".join(sentences).strip()
36
 
37
+
38
  def respond(message, history, system_message, max_tokens, temperature, top_p):
39
+ """
40
+ ChatInterface (type='messages') passes:
41
+ - message: current user message (str)
42
+ - history: list[{'role': 'user'/'assistant', 'content': str}]
43
+ - system_message, max_tokens, temperature, top_p: from additional_inputs
44
+ We return a plain string: the assistant reply.
45
+ """
46
+
47
+ # Few-shot prompt to enforce behavior
48
+ few_shot_prompt = """
49
+ You are a concise reasoning assistant.
50
+
51
+ Rules:
52
+ 1. ALWAYS answer the user's LAST question only.
53
+ 2. Give exactly 1–2 short sentences.
54
+ 3. Provide brief, correct reasoning.
55
+ 4. Never repeat earlier answers.
56
+ 5. Never invent scientific facts.
57
+
58
+ Examples:
59
+
60
+ User: Why do we sweat?
61
+ Assistant: We sweat to cool the body because evaporation removes heat from the skin. This helps regulate temperature.
62
+
63
+ User: Why does metal feel colder than wood?
64
+ Assistant: Metal pulls heat from your skin faster because it conducts heat better than wood. This faster heat transfer makes it feel colder.
65
+
66
+ User: Why do birds fly in a V formation?
67
+ Assistant: Birds fly in a V to save energy because each bird rides the lift from the bird ahead. This reduces effort for the whole group.
68
+ """.strip()
69
+
70
+ # Build messages with few-shot + user-configurable system message
71
+ messages = [
72
+ {"role": "system", "content": few_shot_prompt},
73
+ {"role": "system", "content": system_message},
74
+ ]
75
  messages.extend(history)
76
  messages.append({"role": "user", "content": message})
77
 
78
+ # Apply chat template
79
  prompt = tokenizer.apply_chat_template(
80
  messages,
81
  tokenize=False,
82
+ add_generation_prompt=True,
83
  )
84
 
85
+ # Generate
86
  out = pipe(
87
  prompt,
88
+ max_new_tokens=int(max_tokens),
89
+ temperature=float(temperature),
90
+ top_p=float(top_p),
91
  do_sample=True,
92
  )[0]["generated_text"]
93
 
94
+ # Extract assistant part
95
  if "<|im_start|>assistant" in out:
96
  out = out.split("<|im_start|>assistant", 1)[-1]
97
  out = out.replace("<|im_end|>", "").strip()
98
 
99
+ # Enforce 1–2 sentence max
100
  out = truncate_to_n_sentences(out, n=2)
101
 
102
  return out
 
108
  additional_inputs=[
109
  gr.Textbox(
110
  value="Answer in 1–2 short sentences with brief logical reasoning. Do not exceed 2 sentences.",
111
+ label="System message",
112
  ),
113
+ gr.Slider(1, 128, value=64, step=1, label="Max new tokens"),
114
+ gr.Slider(0.1, 2.0, value=0.3, step=0.1, label="Temperature"),
115
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
116
  ],
117
+ title="SmolLM2 – Short Reasoning Chat",
118
+ description="Fine-tuned SmolLM2 (basmala12/smollm_finetuning5) that answers with 1–2 short sentences and brief reasoning.",
119
  )
120
 
121
  if __name__ == "__main__":