Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -64,10 +64,11 @@ def generate_responses(model, tokenizer, prompts):
|
|
| 64 |
messages = [[{"role": "user", "content": message}] for message in prompts]
|
| 65 |
|
| 66 |
texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 67 |
-
print(texts)
|
|
|
|
| 68 |
model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
|
| 69 |
|
| 70 |
-
print(tokenizer.batch_decode(model_inputs["input_ids"]))
|
| 71 |
|
| 72 |
with torch.inference_mode():
|
| 73 |
generated_ids = model.generate(
|
|
@@ -79,11 +80,11 @@ def generate_responses(model, tokenizer, prompts):
|
|
| 79 |
)
|
| 80 |
prompt_lengths = model_inputs["attention_mask"].sum(dim=1) - 1
|
| 81 |
generated_ids = [output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)]
|
|
|
|
| 82 |
responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
| 83 |
|
| 84 |
return responses
|
| 85 |
|
| 86 |
-
|
| 87 |
def classify_pairs(model, tokenizer, prompts, responses):
|
| 88 |
texts = [prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)]
|
| 89 |
|
|
|
|
| 64 |
messages = [[{"role": "user", "content": message}] for message in prompts]
|
| 65 |
|
| 66 |
texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 67 |
+
print(texts[0])
|
| 68 |
+
|
| 69 |
model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
|
| 70 |
|
| 71 |
+
print(tokenizer.batch_decode(model_inputs["input_ids"][0]))
|
| 72 |
|
| 73 |
with torch.inference_mode():
|
| 74 |
generated_ids = model.generate(
|
|
|
|
| 80 |
)
|
| 81 |
prompt_lengths = model_inputs["attention_mask"].sum(dim=1) - 1
|
| 82 |
generated_ids = [output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)]
|
| 83 |
+
print(tokenizer.batch_decode(generated_ids[0], skip_special_tokens=False))
|
| 84 |
responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
| 85 |
|
| 86 |
return responses
|
| 87 |
|
|
|
|
| 88 |
def classify_pairs(model, tokenizer, prompts, responses):
|
| 89 |
texts = [prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)]
|
| 90 |
|