Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
| 2 |
import torch
|
| 3 |
import os
|
| 4 |
import spaces
|
| 5 |
|
| 6 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
-
from peft import PeftModel
|
| 8 |
-
|
| 9 |
-
|
| 10 |
# 获取 Hugging Face 访问令牌
|
| 11 |
hf_token = os.getenv("HF_API_TOKEN")
|
| 12 |
|
|
@@ -39,11 +37,15 @@ def generate_prompt(instruction, input_text=""):
|
|
| 39 |
return prompt
|
| 40 |
|
| 41 |
# 定义生成响应的函数,并使用 @spaces.GPU 装饰
|
| 42 |
-
@spaces.GPU
|
| 43 |
def generate_response(instruction, input_text):
|
| 44 |
global model
|
| 45 |
|
| 46 |
if model is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# 在函数内部导入需要 GPU 的库
|
| 48 |
import bitsandbytes
|
| 49 |
from transformers import AutoModelForCausalLM
|
|
@@ -54,7 +56,7 @@ def generate_response(instruction, input_text):
|
|
| 54 |
device_map="auto",
|
| 55 |
torch_dtype=torch.float16,
|
| 56 |
use_auth_token=hf_token,
|
| 57 |
-
trust_remote_code=True
|
| 58 |
)
|
| 59 |
|
| 60 |
# 加载 adapter 并将其应用到基础模型上
|
|
@@ -64,6 +66,7 @@ def generate_response(instruction, input_text):
|
|
| 64 |
torch_dtype=torch.float16,
|
| 65 |
use_auth_token=hf_token
|
| 66 |
)
|
|
|
|
| 67 |
# 设置 pad_token
|
| 68 |
tokenizer.pad_token = tokenizer.eos_token
|
| 69 |
model.config.pad_token_id = tokenizer.pad_token_id
|
|
@@ -101,6 +104,5 @@ iface = gr.Interface(
|
|
| 101 |
allow_flagging="never"
|
| 102 |
)
|
| 103 |
|
| 104 |
-
|
| 105 |
# 启动 Gradio 接口
|
| 106 |
iface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer
|
| 3 |
+
from peft import PeftModel
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
import spaces
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# 获取 Hugging Face 访问令牌
|
| 9 |
hf_token = os.getenv("HF_API_TOKEN")
|
| 10 |
|
|
|
|
| 37 |
return prompt
|
| 38 |
|
| 39 |
# 定义生成响应的函数,并使用 @spaces.GPU 装饰
|
| 40 |
+
@spaces.GPU(duration=120) # 如果需要,可以调整 duration 参数
|
| 41 |
def generate_response(instruction, input_text):
|
| 42 |
global model
|
| 43 |
|
| 44 |
if model is None:
|
| 45 |
+
# 在函数内部安装 GPU 版本的 bitsandbytes
|
| 46 |
+
import subprocess
|
| 47 |
+
subprocess.call(["pip", "install", "bitsandbytes==0.41.1"])
|
| 48 |
+
|
| 49 |
# 在函数内部导入需要 GPU 的库
|
| 50 |
import bitsandbytes
|
| 51 |
from transformers import AutoModelForCausalLM
|
|
|
|
| 56 |
device_map="auto",
|
| 57 |
torch_dtype=torch.float16,
|
| 58 |
use_auth_token=hf_token,
|
| 59 |
+
trust_remote_code=True # 如果你的模型使用自定义代码,请保留此参数
|
| 60 |
)
|
| 61 |
|
| 62 |
# 加载 adapter 并将其应用到基础模型上
|
|
|
|
| 66 |
torch_dtype=torch.float16,
|
| 67 |
use_auth_token=hf_token
|
| 68 |
)
|
| 69 |
+
|
| 70 |
# 设置 pad_token
|
| 71 |
tokenizer.pad_token = tokenizer.eos_token
|
| 72 |
model.config.pad_token_id = tokenizer.pad_token_id
|
|
|
|
| 104 |
allow_flagging="never"
|
| 105 |
)
|
| 106 |
|
|
|
|
| 107 |
# 启动 Gradio 接口
|
| 108 |
iface.launch()
|