Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,10 +3,12 @@ import gradio as gr
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
import torch
|
| 5 |
|
|
|
|
|
|
|
| 6 |
# 設置模型 ID 和加載 Hugging Face API token
|
| 7 |
model_id = "dingckc/FineLlama-3.1-8B"
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv('ACCESS_KEY'))
|
| 9 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, token=os.getenv('ACCESS_KEY'))
|
| 10 |
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 11 |
|
| 12 |
# 定義推理函數
|
|
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
import torch
|
| 5 |
|
| 6 |
+
import bitsandbytes as bnb
|
| 7 |
+
|
| 8 |
# 設置模型 ID 和加載 Hugging Face API token
|
| 9 |
model_id = "dingckc/FineLlama-3.1-8B"
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv('ACCESS_KEY'))
|
| 11 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True, token=os.getenv('ACCESS_KEY'))
|
| 12 |
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
|
| 14 |
# 定義推理函數
|