MindLabUnimib commited on
Commit
519b2ce
·
verified ·
1 Parent(s): bbd23ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -65,11 +65,11 @@ def generate_responses(model, tokenizer, prompts):
65
 
66
  texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
67
  print(texts[0])
68
-
69
- model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
70
-
71
- print(tokenizer.batch_decode(model_inputs["input_ids"][0]))
72
 
 
 
 
 
73
  with torch.inference_mode():
74
  generated_ids = model.generate(
75
  **model_inputs,
@@ -78,9 +78,9 @@ def generate_responses(model, tokenizer, prompts):
78
  repetition_penalty=1.1,
79
  max_new_tokens=512,
80
  )
81
- prompt_lengths = model_inputs["attention_mask"].sum(dim=1) - 1
82
  generated_ids = [output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)]
83
- print(tokenizer.batch_decode(generated_ids[0], skip_special_tokens=False))
84
  responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
85
 
86
  return responses
 
65
 
66
  texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
67
  print(texts[0])
 
 
 
 
68
 
69
+ model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
70
+
71
+ print(tokenizer.decode(model_inputs["input_ids"][0]))
72
+
73
  with torch.inference_mode():
74
  generated_ids = model.generate(
75
  **model_inputs,
 
78
  repetition_penalty=1.1,
79
  max_new_tokens=512,
80
  )
81
+ prompt_lengths = model_inputs["attention_mask"].sum(dim=1)
82
  generated_ids = [output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)]
83
+ print(tokenizer.decode(generated_ids[0], skip_special_tokens=False))
84
  responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
85
 
86
  return responses