basmala12 commited on
Commit
126e7fa
·
verified ·
1 Parent(s): 38a7826

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -21
app.py CHANGED
@@ -12,61 +12,53 @@ model.eval()
12
 
13
  def respond(message, history, system_message, max_tokens, temperature, top_p):
14
  """
15
- ChatInterface (type="messages") passes:
16
- - message: current user message (str)
17
- - history: list of dicts: [{"role": "...", "content": "..."}, ...]
18
- - system_message, max_tokens, temperature, top_p: from additional_inputs
19
- We return a single string: the assistant reply.
20
  """
21
 
22
- # Build full conversation for the chat template
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
  # history is a list of {"role": "user"/"assistant", "content": str}
26
- # We append it as-is to preserve previous turns
27
  messages.extend(history)
28
 
29
- # Add the new user question
30
  messages.append({"role": "user", "content": message})
31
 
32
- # Turn into model prompt using the tokenizer's chat template
33
  prompt = tokenizer.apply_chat_template(
34
  messages,
35
  tokenize=False,
36
  add_generation_prompt=True,
37
  )
38
 
39
- # Tokenize
40
  inputs = tokenizer(prompt, return_tensors="pt")
41
 
42
- # Generate continuation (new assistant answer only)
43
  with torch.no_grad():
44
  outputs = model.generate(
45
  **inputs,
46
  max_new_tokens=max_tokens,
47
- do_sample=True,
48
- temperature=float(temperature),
49
- top_p=float(top_p),
50
  )
51
 
52
- # Slice off the prompt tokens, keep only new tokens
53
  generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
54
  answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
55
 
56
- # Optional: enforce "short answer + brief reasoning"
57
- words = answer.split()
58
- if len(words) > 60:
59
- answer = " ".join(words[:60]) + " ..."
60
-
61
  return answer
62
 
63
 
 
64
  chatbot = gr.ChatInterface(
65
  fn=respond,
66
  type="messages",
67
  additional_inputs=[
68
  gr.Textbox(
69
- value="Give short answers with brief logical reasoning.",
70
  label="System message",
71
  ),
72
  gr.Slider(1, 512, value=256, step=1, label="Max new tokens"),
 
12
 
13
  def respond(message, history, system_message, max_tokens, temperature, top_p):
14
  """
15
+ SAFER / MORE FACTUAL VERSION (Option A)
16
+
17
+ - Deterministic decoding (no sampling)
18
+ - Uses chat template correctly
19
+ - Returns only the new assistant answer
20
  """
21
 
22
+ # Build conversation for the chat template
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
  # history is a list of {"role": "user"/"assistant", "content": str}
 
26
  messages.extend(history)
27
 
28
+ # Add current user message
29
  messages.append({"role": "user", "content": message})
30
 
31
+ # Turn into prompt
32
  prompt = tokenizer.apply_chat_template(
33
  messages,
34
  tokenize=False,
35
  add_generation_prompt=True,
36
  )
37
 
 
38
  inputs = tokenizer(prompt, return_tensors="pt")
39
 
 
40
  with torch.no_grad():
41
  outputs = model.generate(
42
  **inputs,
43
  max_new_tokens=max_tokens,
44
+ do_sample=False, # <- deterministic, no randomness
45
+ temperature=0.0, # <- ignored when do_sample=False, but explicit
 
46
  )
47
 
48
+ # Keep only new tokens after the prompt
49
  generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
50
  answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
51
 
 
 
 
 
 
52
  return answer
53
 
54
 
55
+
56
  chatbot = gr.ChatInterface(
57
  fn=respond,
58
  type="messages",
59
  additional_inputs=[
60
  gr.Textbox(
61
+ value="Give short, factual answers with brief logical reasoning. If you are not sure, say you are not sure instead of guessing.",
62
  label="System message",
63
  ),
64
  gr.Slider(1, 512, value=256, step=1, label="Max new tokens"),