Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,14 +22,14 @@ def say_hello(msg: Annotated[str, Form()]):
|
|
| 22 |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
| 23 |
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
| 24 |
|
| 25 |
-
messages = [{"role": "user", "content":
|
| 26 |
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
|
| 27 |
print(input_text)
|
| 28 |
|
| 29 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
| 30 |
print("output")
|
| 31 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 32 |
-
outputs = model.generate(inputs, max_new_tokens=
|
| 33 |
|
| 34 |
print("complete")
|
| 35 |
return {"message": tokenizer.decode(outputs[0])}
|
|
|
|
| 22 |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
| 23 |
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
| 24 |
|
| 25 |
+
messages = [{"role": "user", "content": msg}]
|
| 26 |
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
|
| 27 |
print(input_text)
|
| 28 |
|
| 29 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
| 30 |
print("output")
|
| 31 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 32 |
+
outputs = model.generate(inputs, max_new_tokens=256, temperature=0.6, top_p=0.92, do_sample=True)
|
| 33 |
|
| 34 |
print("complete")
|
| 35 |
return {"message": tokenizer.decode(outputs[0])}
|