Spaces:

Snow2222
/

deeptest

No application file

App Files Files Community

deeptest / app_lora.py

Snow2222

Rename app.py to app_lora.py

1ebad56 verified over 1 year ago

raw

history blame contribute delete

3.24 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	# 基模型 ID（LoRA 适配器是基于该基础 LLM 训练的）
	BASE_MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
	ADAPTER_MODEL_ID = "Snow2222/autotrain-fst"

	print("🚀 正在加载 Tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)

	print("🚀 正在加载 Base Model（基础模型）...")
	device = "cuda" if torch.cuda.is_available() else "cpu"
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto"
	)

	# 解决 `size mismatch` 问题：让基模型的词表大小与 LoRA 的词表保持一致
	print("🔧 调整 vocab_size 以匹配 LoRA...")
	new_vocab_size = 151665
	base_model.resize_token_embeddings(new_vocab_size)

	print("🚀 正在加载 LoRA 适配器...")
	model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID).to(device)

	# ======== 正确地“放大” LoRA 权重影响力 ========
	# 1) 获取 adapter 名称（默认叫 "default" 或者是你的自定义名称）
	adapter_name = model.active_adapter or "default" # 若未自定义，一般是 "default"

	# 2) 修改 peft_config.lora_alpha
	peft_config = model.peft_config[adapter_name]
	print(f"【原始】lora_alpha: {peft_config.lora_alpha}")
	peft_config.lora_alpha = 128 # 你想尝试的较大值
	print(f"【更新后】lora_alpha: {peft_config.lora_alpha}")

	# 3) 遍历模型的 LoRA 层，将 scaling[adapter_name] 更新为相同的 alpha
	for module_name, module in model.named_modules():
	# LoRA 层通常有 module.scaling，是个 dict
	if hasattr(module, "scaling") and isinstance(module.scaling, dict):
	# 如果当前 adapter 在该 dict 中，就更新
	if adapter_name in module.scaling:
	module.scaling[adapter_name] = peft_config.lora_alpha
	# ======== LoRA 放大操作结束 ========

	def respond(message, history, system_message, max_tokens, temperature, top_p):
	print("==== 🚀 处理用户输入 ====")
	print(f"用户输入: {message}")

	# 构造简单的 Prompt
	prompt = f"{system_message}\n用户: {message}\n助手:"
	print(f"📡 处理 Prompt: {prompt}")

	# 只处理当前输入
	inputs = tokenizer(message, return_tensors="pt", truncation=True).to(device)

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p
	)
	response = tokenizer.decode(output[0], skip_special_tokens=True)
	print(f"✅ 生成结果: {response}")

	return response

	# Gradio UI
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
	],
	)

	if __name__ == "__main__":
	print("🌍 启动 Gradio 界面...")
	demo.launch()