basmala12 commited on
Commit
af83bc6
·
verified ·
1 Parent(s): 0a70310

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -65
app.py CHANGED
@@ -1,40 +1,21 @@
1
- import re
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
 
5
  MODEL_NAME = "basmala12/smollm_finetuning5"
6
 
7
- # Load model & tokenizer once
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
 
 
11
  pipe = pipeline(
12
  "text-generation",
13
  model=model,
14
  tokenizer=tokenizer,
15
  )
16
 
17
-
18
- def truncate_to_n_sentences(text: str, n: int = 2) -> str:
19
- """Force output to a maximum of N sentences."""
20
- parts = re.split(r'([.!?])', text)
21
- sentences = []
22
- current = ""
23
-
24
- for chunk in parts:
25
- current += chunk
26
- if chunk in [".", "!", "?"]:
27
- sentences.append(current.strip())
28
- current = ""
29
- if len(sentences) >= n:
30
- break
31
-
32
- if not sentences:
33
- return text.strip()
34
-
35
- return " ".join(sentences).strip()
36
-
37
-
38
  def respond(message, history, system_message, max_tokens, temperature, top_p):
39
  """
40
  ChatInterface (type='messages') passes:
@@ -44,61 +25,30 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
44
  We return a plain string: the assistant reply.
45
  """
46
 
47
- # Few-shot prompt to enforce behavior
48
- few_shot_prompt = """
49
- You are a concise reasoning assistant.
50
-
51
- Rules:
52
- 1. ALWAYS answer the user's LAST question only.
53
- 2. Give exactly 1–2 short sentences.
54
- 3. Provide brief, correct reasoning.
55
- 4. Never repeat earlier answers.
56
- 5. Never invent scientific facts.
57
-
58
- Examples:
59
-
60
- User: Why do we sweat?
61
- Assistant: We sweat to cool the body because evaporation removes heat from the skin. This helps regulate temperature.
62
-
63
- User: Why does metal feel colder than wood?
64
- Assistant: Metal pulls heat from your skin faster because it conducts heat better than wood. This faster heat transfer makes it feel colder.
65
-
66
- User: Why do birds fly in a V formation?
67
- Assistant: Birds fly in a V to save energy because each bird rides the lift from the bird ahead. This reduces effort for the whole group.
68
- """.strip()
69
-
70
- # Build messages with few-shot + user-configurable system message
71
- messages = [
72
- {"role": "system", "content": few_shot_prompt},
73
- {"role": "system", "content": system_message},
74
- ]
75
  messages.extend(history)
76
  messages.append({"role": "user", "content": message})
77
 
78
- # Apply chat template
79
  prompt = tokenizer.apply_chat_template(
80
  messages,
81
  tokenize=False,
82
  add_generation_prompt=True,
83
  )
84
 
85
- # Generate
86
  out = pipe(
87
  prompt,
88
- max_new_tokens=int(max_tokens),
89
- temperature=float(temperature),
90
- top_p=float(top_p),
91
  do_sample=True,
92
  )[0]["generated_text"]
93
 
94
- # Extract assistant part
95
  if "<|im_start|>assistant" in out:
96
  out = out.split("<|im_start|>assistant", 1)[-1]
97
  out = out.replace("<|im_end|>", "").strip()
98
 
99
- # Enforce 1–2 sentence max
100
- out = truncate_to_n_sentences(out, n=2)
101
-
102
  return out
103
 
104
 
@@ -107,15 +57,13 @@ chatbot = gr.ChatInterface(
107
  type="messages",
108
  additional_inputs=[
109
  gr.Textbox(
110
- value="Answer in 1–2 short sentences with brief logical reasoning. Do not exceed 2 sentences.",
111
  label="System message",
112
  ),
113
- gr.Slider(1, 128, value=64, step=1, label="Max new tokens"),
114
- gr.Slider(0.1, 2.0, value=0.3, step=0.1, label="Temperature"),
115
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
116
  ],
117
- title="SmolLM2 – Short Reasoning Chat",
118
- description="Fine-tuned SmolLM2 (basmala12/smollm_finetuning5) that answers with 1–2 short sentences and brief reasoning.",
119
  )
120
 
121
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
 
4
  MODEL_NAME = "basmala12/smollm_finetuning5"
5
 
6
+ # Load model & tokenizer once at startup
7
+ tokenizer = AutoModelForCausalLM = None # just to avoid lints
8
+
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
11
 
12
+ # Text-generation pipeline on CPU
13
  pipe = pipeline(
14
  "text-generation",
15
  model=model,
16
  tokenizer=tokenizer,
17
  )
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def respond(message, history, system_message, max_tokens, temperature, top_p):
20
  """
21
  ChatInterface (type='messages') passes:
 
25
  We return a plain string: the assistant reply.
26
  """
27
 
28
+ # Build full chat messages for the chat template
29
+ messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  messages.extend(history)
31
  messages.append({"role": "user", "content": message})
32
 
 
33
  prompt = tokenizer.apply_chat_template(
34
  messages,
35
  tokenize=False,
36
  add_generation_prompt=True,
37
  )
38
 
 
39
  out = pipe(
40
  prompt,
41
+ max_new_tokens=max_tokens,
42
+ temperature=temperature,
43
+ top_p=top_p,
44
  do_sample=True,
45
  )[0]["generated_text"]
46
 
47
+ # Keep only the assistant part after the template
48
  if "<|im_start|>assistant" in out:
49
  out = out.split("<|im_start|>assistant", 1)[-1]
50
  out = out.replace("<|im_end|>", "").strip()
51
 
 
 
 
52
  return out
53
 
54
 
 
57
  type="messages",
58
  additional_inputs=[
59
  gr.Textbox(
60
+ value="Give short answers with brief logical reasoning.",
61
  label="System message",
62
  ),
63
+ gr.Slider(1, 512, value=256, step=1, label="Max new tokens"),
64
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
65
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
66
  ],
 
 
67
  )
68
 
69
  if __name__ == "__main__":