Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import torch | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments | |
| from datasets import Dataset | |
| # 设置 Hugging Face 缓存目录 | |
| os.environ['HF_HOME'] = '/tmp/huggingface_cache' | |
| # 读取 Hugging Face Token | |
| hf_token = os.getenv('HF_TOKEN') # 请确保环境变量 HF_TOKEN 已设置 | |
| if hf_token: | |
| from huggingface_hub import HfFolder | |
| HfFolder.save_token(hf_token) | |
| else: | |
| raise ValueError("Hugging Face token 未设置") | |
| # ✅ 统一设备 | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # ✅ **换成兼容的学生模型** | |
| teacher_model_name = "Qwen/Qwen1.5-7B-Chat" | |
| student_model_name = "Qwen/Qwen1.5-1.8B-Chat" # ✅ **换成 Qwen1.5 1.8B 版本** | |
| # 加载教师模型 | |
| teacher = AutoModelForCausalLM.from_pretrained( | |
| teacher_model_name, | |
| trust_remote_code=True, | |
| token=hf_token | |
| ).to(device) | |
| teacher.eval() | |
| # 加载**学生模型** | |
| student = AutoModelForCausalLM.from_pretrained( | |
| student_model_name, | |
| trust_remote_code=True, | |
| token=hf_token | |
| ).to(device) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| student_model_name, # ✅ **用 Qwen 词表,防止维度不匹配** | |
| trust_remote_code=True, | |
| token=hf_token | |
| ) | |
| # **处理 pad_token 问题** | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # 读取数据文件 | |
| with open('data.json', 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| # 检查数据格式 | |
| if not isinstance(data, list): | |
| raise ValueError("data.json 格式错误,需要是一个列表!") | |
| # **格式化数据(加 `chat_template`)** | |
| def format_chat(example): | |
| instruction = example["instruction"] | |
| output = example["output"] | |
| return f"<|im_start|>system\n你是一个粉丝通软件的智能客服助手。\n<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{output}<|im_end|>" | |
| # **数据预处理** | |
| def preprocess_data(example): | |
| inputs = tokenizer(example["instruction"], truncation=True, padding="max_length", max_length=128) | |
| labels = tokenizer(example["output"], truncation=True, padding="max_length", max_length=128) | |
| return { | |
| "input_ids": inputs["input_ids"], | |
| "attention_mask": inputs["attention_mask"], | |
| "labels": labels["input_ids"] | |
| } | |
| # def preprocess_data(example): | |
| # formatted_text = format_chat(example) | |
| # tokens = tokenizer(formatted_text, truncation=True, padding="max_length", max_length=128) | |
| # return { | |
| # "input_ids": tokens["input_ids"], | |
| # "attention_mask": tokens["attention_mask"], | |
| # "labels": tokens["input_ids"] | |
| # } | |
| dataset = Dataset.from_list(data) | |
| dataset = dataset.map(preprocess_data, batched=True) | |
| # ✅ **修正 KL 散度计算** | |
| class DistillationTrainer(Trainer): | |
| def __init__(self, teacher, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| self.teacher = teacher.to(device) | |
| def compute_loss(self, model, inputs, return_outputs=False): | |
| labels = inputs["input_ids"] | |
| # **学生模型的输出** | |
| outputs_student = model(**inputs) | |
| logits_student = outputs_student.logits | |
| # **教师模型的输出** | |
| with torch.no_grad(): | |
| inputs_on_device = {k: v.to(device) for k, v in inputs.items()} | |
| outputs_teacher = self.teacher(**inputs_on_device) | |
| logits_teacher = outputs_teacher.logits | |
| temperature = 2.0 | |
| # **修正维度** | |
| student_log_probs = torch.nn.functional.log_softmax(logits_student / temperature, dim=-1) | |
| teacher_probs = torch.nn.functional.softmax(logits_teacher / temperature, dim=-1) | |
| # ✅ **确保两个 logits 维度匹配** | |
| if student_log_probs.shape != teacher_probs.shape: | |
| min_dim = min(student_log_probs.shape[-1], teacher_probs.shape[-1]) | |
| student_log_probs = student_log_probs[..., :min_dim] | |
| teacher_probs = teacher_probs[..., :min_dim] | |
| # **计算 KL 散度** | |
| kl_loss = torch.nn.functional.kl_div(student_log_probs, teacher_probs, reduction="batchmean") * (temperature ** 2) | |
| # **计算交叉熵损失** | |
| ce_loss_fct = torch.nn.CrossEntropyLoss() | |
| ce_loss = ce_loss_fct(logits_student.view(-1, logits_student.size(-1)), labels.view(-1)) | |
| # **混合损失** | |
| alpha = 0.5 | |
| loss = alpha * ce_loss + (1 - alpha) * kl_loss | |
| return (loss, outputs_student) if return_outputs else loss | |
| def training_step(self, model, inputs, *args, **kwargs): | |
| """确保所有输入都在 GPU 上""" | |
| model.train() | |
| inputs = {k: v.to(device) for k, v in self._prepare_inputs(inputs).items()} | |
| loss = self.compute_loss(model, inputs) | |
| return loss | |
| # 训练参数 | |
| # ✅ 移除 `use_cache` 选项 | |
| training_args = TrainingArguments( | |
| output_dir="/tmp/distilled_model", | |
| num_train_epochs=3, | |
| per_device_train_batch_size=8, | |
| learning_rate=2e-5, | |
| weight_decay=0.01, | |
| evaluation_strategy="epoch", | |
| logging_steps=100, | |
| save_strategy="epoch", | |
| remove_unused_columns=False, | |
| gradient_checkpointing=True, | |
| fp16=False, # ✅ 禁用 fp16,避免 _scale=None 错误 | |
| bf16=True if torch.cuda.is_available() else False # ✅ 仅在支持 bf16 时启用 | |
| ) | |
| # ✅ **手动禁用 `use_cache`** | |
| student.config.use_cache = False # 🔥 这样就不会影响 `TrainingArguments`,但依然禁用了 `use_cache` | |
| # 初始化 Trainer | |
| trainer = DistillationTrainer( | |
| teacher=teacher, | |
| model=student, | |
| args=training_args, | |
| train_dataset=dataset, | |
| eval_dataset=dataset | |
| ) | |
| # 开始训练 | |
| trainer.train() | |
| # **保存模型** | |
| student.push_to_hub("Snow2222/fst-nnn", use_auth_token=hf_token) | |
| tokenizer.push_to_hub("Snow2222/fst-nnn", use_auth_token=hf_token) | |
| # ✅ 部署 Gradio Web 界面 | |
| print("🎉 训练完成,启动 Gradio Web 界面...") | |
| # **模型 ID** | |
| model_id = "Snow2222/fst-nnn" | |
| # **选择设备** | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # **加载模型** | |
| print("🚀 正在加载模型...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| token=hf_token, | |
| trust_remote_code=True | |
| ).to(device) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token) | |
| print("✅ 模型加载成功!") | |
| # **Gradio 交互函数** | |
| def chat_response(prompt): | |
| chat_input = f"<|im_start|>system\n你是一个粉丝通软件的智能客服助手。\n<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" | |
| inputs = tokenizer(chat_input, return_tensors="pt").to(device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=100, # ✅ 控制生成长度 | |
| do_sample=True, | |
| temperature=0.7 | |
| ) | |
| result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return result | |
| # **创建 Gradio 界面** | |
| iface = gr.Interface( | |
| fn=chat_response, | |
| inputs=gr.Textbox(lines=2, placeholder="请输入你的问题..."), | |
| outputs="text", | |
| title="粉丝通 AI 客服", | |
| description="基于 Snow2222/fst-nnn 训练的 AI 模型,自动回答你的问题。", | |
| allow_flagging="never" | |
| ) | |
| # **运行 Gradio** | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |