bobpopboom commited on
Commit
2876cd4
·
verified ·
1 Parent(s): b7c5b78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -24,9 +24,10 @@ try:
24
  model=model,
25
  tokenizer=tokenizer,
26
  torch_dtype=torch.float16,
27
- max_length=128, # Limit response length
28
  num_return_sequences=1, # Only generate one response
29
- do_sample=False # Use greedy decoding for speed
 
 
30
  )
31
 
32
  except Exception as e:
@@ -55,6 +56,8 @@ def respond(
55
  temperature=temperature,
56
  top_p=top_p,
57
  )[0]["generated_text"]
 
 
58
  #Extract the bot's reply (adjust if your model format is different)
59
  bot_response = response.split("Assistant:")[-1].strip()
60
  yield bot_response
 
24
  model=model,
25
  tokenizer=tokenizer,
26
  torch_dtype=torch.float16,
 
27
  num_return_sequences=1, # Only generate one response
28
+ do_sample=True, # Enable sampling since we're using temperature and top_p
29
+ truncation=True, # Explicitly enable truncation
30
+ max_new_tokens=128 # Use only max_new_tokens
31
  )
32
 
33
  except Exception as e:
 
56
  temperature=temperature,
57
  top_p=top_p,
58
  )[0]["generated_text"]
59
+ prompt,
60
+ do_sample=True,
61
  #Extract the bot's reply (adjust if your model format is different)
62
  bot_response = response.split("Assistant:")[-1].strip()
63
  yield bot_response