deeptest / app_lora.py
Snow2222's picture
Rename app.py to app_lora.py
1ebad56 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# **基模型 ID(LoRA 适配器是基于该基础 LLM 训练的)**
BASE_MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
ADAPTER_MODEL_ID = "Snow2222/autotrain-fst"
print("🚀 正在加载 Tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
print("🚀 正在加载 Base Model(基础模型)...")
device = "cuda" if torch.cuda.is_available() else "cpu"
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
# **解决 `size mismatch` 问题:让基模型的词表大小与 LoRA 的词表保持一致**
print("🔧 调整 vocab_size 以匹配 LoRA...")
new_vocab_size = 151665
base_model.resize_token_embeddings(new_vocab_size)
print("🚀 正在加载 LoRA 适配器...")
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID).to(device)
# ======== 正确地“放大” LoRA 权重影响力 ========
# 1) 获取 adapter 名称(默认叫 "default" 或者是你的自定义名称)
adapter_name = model.active_adapter or "default" # 若未自定义,一般是 "default"
# 2) 修改 peft_config.lora_alpha
peft_config = model.peft_config[adapter_name]
print(f"【原始】lora_alpha: {peft_config.lora_alpha}")
peft_config.lora_alpha = 128 # 你想尝试的较大值
print(f"【更新后】lora_alpha: {peft_config.lora_alpha}")
# 3) 遍历模型的 LoRA 层,将 scaling[adapter_name] 更新为相同的 alpha
for module_name, module in model.named_modules():
# LoRA 层通常有 module.scaling,是个 dict
if hasattr(module, "scaling") and isinstance(module.scaling, dict):
# 如果当前 adapter 在该 dict 中,就更新
if adapter_name in module.scaling:
module.scaling[adapter_name] = peft_config.lora_alpha
# ======== LoRA 放大操作结束 ========
def respond(message, history, system_message, max_tokens, temperature, top_p):
print("==== 🚀 处理用户输入 ====")
print(f"用户输入: {message}")
# 构造简单的 Prompt
prompt = f"{system_message}\n用户: {message}\n助手:"
print(f"📡 处理 Prompt: {prompt}")
# 只处理当前输入
inputs = tokenizer(message, return_tensors="pt", truncation=True).to(device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(f"✅ 生成结果: {response}")
return response
# **Gradio UI**
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
],
)
if __name__ == "__main__":
print("🌍 启动 Gradio 界面...")
demo.launch()