Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
|
| 13 |
|
| 14 |
# Load the Lora model
|
| 15 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
| 16 |
-
model = model.to('cuda:0')
|
| 17 |
|
| 18 |
|
| 19 |
def make_inference(product_name, product_description):
|
|
@@ -21,6 +21,8 @@ def make_inference(product_name, product_description):
|
|
| 21 |
f"### Product and Description:\n{product_name}: {product_description}\n\n### Ad:",
|
| 22 |
return_tensors="pt",
|
| 23 |
)
|
|
|
|
|
|
|
| 24 |
|
| 25 |
with torch.cuda.amp.autocast():
|
| 26 |
output_tokens = model.generate(**batch, max_new_tokens=50)
|
|
|
|
| 13 |
|
| 14 |
# Load the Lora model
|
| 15 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
| 16 |
+
#model = model.to('cuda:0')
|
| 17 |
|
| 18 |
|
| 19 |
def make_inference(product_name, product_description):
|
|
|
|
| 21 |
f"### Product and Description:\n{product_name}: {product_description}\n\n### Ad:",
|
| 22 |
return_tensors="pt",
|
| 23 |
)
|
| 24 |
+
# Move the input tensors to the GPU
|
| 25 |
+
batch = {key: value.to('cuda:0') for key, value in batch.items()}
|
| 26 |
|
| 27 |
with torch.cuda.amp.autocast():
|
| 28 |
output_tokens = model.generate(**batch, max_new_tokens=50)
|