TobDeBer commited on
Commit
56bfe89
·
1 Parent(s): d9a2223

llm safety

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -22,11 +22,14 @@ def load_model():
22
  global tokenizer, model
23
  try:
24
  print(f"Loading model: {MODEL_NAME}")
25
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  MODEL_NAME,
28
  dtype=torch.float32,
29
- device_map="auto"
30
  )
31
 
32
  if tokenizer.pad_token is None:
@@ -62,6 +65,7 @@ def chat_predict(message, history, max_length, temperature, top_p, repetition_pe
62
 
63
  # Format the prompt
64
  formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
65
  inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
66
 
67
  # Setup streamer
 
22
  global tokenizer, model
23
  try:
24
  print(f"Loading model: {MODEL_NAME}")
25
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
26
+ tokenizer.padding_side = "left" # Ensure consistency
27
+ if tokenizer.pad_token is None:
28
+ tokenizer.pad_token = tokenizer.eos_token
29
  model = AutoModelForCausalLM.from_pretrained(
30
  MODEL_NAME,
31
  dtype=torch.float32,
32
+ device_map={"": "cpu"}
33
  )
34
 
35
  if tokenizer.pad_token is None:
 
65
 
66
  # Format the prompt
67
  formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
68
+ print("formatted_prompt: ", formatted_prompt)
69
  inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
70
 
71
  # Setup streamer