basmala12 commited on
Commit
27ed1d2
·
verified ·
1 Parent(s): 126e7fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -16
app.py CHANGED
@@ -4,7 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  MODEL_NAME = "basmala12/smollm_finetuning5"
6
 
7
- # Load tokenizer & model once at startup (on CPU)
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
  model.eval()
@@ -12,14 +12,14 @@ model.eval()
12
 
13
  def respond(message, history, system_message, max_tokens, temperature, top_p):
14
  """
15
- SAFER / MORE FACTUAL VERSION (Option A)
16
-
17
- - Deterministic decoding (no sampling)
18
- - Uses chat template correctly
19
- - Returns only the new assistant answer
20
  """
21
 
22
- # Build conversation for the chat template
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
  # history is a list of {"role": "user"/"assistant", "content": str}
@@ -28,7 +28,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
28
  # Add current user message
29
  messages.append({"role": "user", "content": message})
30
 
31
- # Turn into prompt
32
  prompt = tokenizer.apply_chat_template(
33
  messages,
34
  tokenize=False,
@@ -37,20 +37,32 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
37
 
38
  inputs = tokenizer(prompt, return_tensors="pt")
39
 
 
40
  with torch.no_grad():
41
  outputs = model.generate(
42
  **inputs,
43
  max_new_tokens=max_tokens,
44
- do_sample=False, # <- deterministic, no randomness
45
- temperature=0.0, # <- ignored when do_sample=False, but explicit
46
  )
47
 
48
- # Keep only new tokens after the prompt
49
  generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
50
  answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
51
 
52
- return answer
 
 
 
 
 
53
 
 
 
 
 
 
 
54
 
55
 
56
  chatbot = gr.ChatInterface(
@@ -58,15 +70,18 @@ chatbot = gr.ChatInterface(
58
  type="messages",
59
  additional_inputs=[
60
  gr.Textbox(
61
- value="Give short, factual answers with brief logical reasoning. If you are not sure, say you are not sure instead of guessing.",
 
 
 
62
  label="System message",
63
  ),
64
  gr.Slider(1, 512, value=256, step=1, label="Max new tokens"),
65
- gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
66
- gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
67
  ],
68
  title="SmolLM2 – Short Reasoning Chatbot",
69
- description="Fine-tuned SmolLM2 (basmala12/smollm_finetuning5) that gives short answers with brief logical reasoning.",
70
  )
71
 
72
  if __name__ == "__main__":
 
4
 
5
  MODEL_NAME = "basmala12/smollm_finetuning5"
6
 
7
+ # Load tokenizer & model once at startup
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
  model.eval()
 
12
 
13
  def respond(message, history, system_message, max_tokens, temperature, top_p):
14
  """
15
+ Safer, generic factual mode:
16
+ - uses chat template properly
17
+ - deterministic decoding (no sampling)
18
+ - generic conciseness filter (1–2 sentences, word cap)
19
+ - NO hardcoded answers for specific questions
20
  """
21
 
22
+ # Build conversation for chat template
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
  # history is a list of {"role": "user"/"assistant", "content": str}
 
28
  # Add current user message
29
  messages.append({"role": "user", "content": message})
30
 
31
+ # Apply chat template
32
  prompt = tokenizer.apply_chat_template(
33
  messages,
34
  tokenize=False,
 
37
 
38
  inputs = tokenizer(prompt, return_tensors="pt")
39
 
40
+ # Deterministic generation: safer, less hallucination than sampling
41
  with torch.no_grad():
42
  outputs = model.generate(
43
  **inputs,
44
  max_new_tokens=max_tokens,
45
+ do_sample=False, # no randomness
46
+ temperature=0.0, # ignored when do_sample=False, but explicit
47
  )
48
 
49
+ # Take only the newly generated tokens (after the prompt)
50
  generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
51
  answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
52
 
53
+ # ---------- Generic conciseness: first 1–2 sentences, word cap ----------
54
+ import re
55
+
56
+ # Keep only first 1–2 sentences
57
+ sentences = re.split(r'(?<=[.!?])\s+', answer)
58
+ answer = " ".join(sentences[:2])
59
 
60
+ # Word cap (e.g. ~40 words)
61
+ words = answer.split()
62
+ if len(words) > 40:
63
+ answer = " ".join(words[:40]) + "."
64
+
65
+ return answer
66
 
67
 
68
  chatbot = gr.ChatInterface(
 
70
  type="messages",
71
  additional_inputs=[
72
  gr.Textbox(
73
+ value=(
74
+ "Give short, factual answers with brief logical reasoning. "
75
+ "If you are not sure, say you are not sure instead of guessing."
76
+ ),
77
  label="System message",
78
  ),
79
  gr.Slider(1, 512, value=256, step=1, label="Max new tokens"),
80
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature (ignored in deterministic mode)"),
81
+ gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (ignored in deterministic mode)"),
82
  ],
83
  title="SmolLM2 – Short Reasoning Chatbot",
84
+ description="Fine-tuned SmolLM2 (basmala12/smollm_finetuning5) that gives short, factual answers with brief reasoning.",
85
  )
86
 
87
  if __name__ == "__main__":