Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,12 +15,10 @@ load_in_4bit = True # Set to True if you want to use 4-bit quantization
|
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 16 |
|
| 17 |
# Load the base model with adapters
|
| 18 |
-
model = AutoAdapterModel.from_pretrained(model_name, low_cpu_mem_usage=True)
|
| 19 |
model.load_adapter(lora_adapter)
|
| 20 |
|
| 21 |
-
|
| 22 |
-
device = torch.device("cpu")
|
| 23 |
-
model.to(device)
|
| 24 |
|
| 25 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
| 26 |
# Combine system message and chat history
|
|
|
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 16 |
|
| 17 |
# Load the base model with adapters
|
| 18 |
+
model = AutoAdapterModel.from_pretrained(model_name, low_cpu_mem_usage=True).to("cuda")
|
| 19 |
model.load_adapter(lora_adapter)
|
| 20 |
|
| 21 |
+
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
| 24 |
# Combine system message and chat history
|