Spaces:

Snow2222
/

SSS-Distillation

Runtime error

App Files Files Community

Snow2222 commited on Feb 8, 2025

Commit

0b272a1

verified ·

1 Parent(s): d6c06f6

Update train.py

Browse files

Files changed (1) hide show

train.py +5 -13

train.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 import json
-from transformers import AutoModelForCausalLM,AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer, BitsAndBytesConfig
 from datasets import Dataset
 from huggingface_hub import HfApi, HfFolder
 from peft import LoraConfig, get_peft_model
 os.environ['HF_HOME'] = '/tmp/huggingface_cache'
 # 从环境变量加载 Hugging Face token
 hf_token = os.getenv('HF_TOKEN')  # 假设你将 token 设置为环境变量
 if hf_token:
@@ -15,24 +14,19 @@ if hf_token:
 else:
     raise ValueError("Hugging Face token 未设置")
-# ✅ 设置量化配置，避免 fp8 量化错误
-quantization_config = BitsAndBytesConfig(load_in_4bit=True)  # 使用 4-bit 量化，避免 fp8 错误
-# 加载基础模型（例如：DeepSeek-R1）
-model_name = "deepseek-ai/DeepSeek-R1"  # 你可以根据需要调整基础模型
-# ✅ 允许远程执行自定义代码，并启用 4-bit 量化
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True,
-    token=hf_token,  # 旧版 `use_auth_token` 被弃用
-    quantization_config=quantization_config  # 指定 4-bit 量化
 )
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
     trust_remote_code=True,
     token=hf_token
 )
 # 读取数据文件
 with open('data.json', 'r', encoding='utf-8') as f:
     data = json.load(f)
@@ -80,5 +74,3 @@ trainer.train()
 # 保存模型
 model.save_pretrained("./fst-nnn")
 tokenizer.save_pretrained("./fst-nnn")
-# 上传到 Hugging Face

 import os
 import json
+from transformers import AutoModelForCausalLM, Trainer, TrainingArguments, AutoTokenizer
 from datasets import Dataset
 from huggingface_hub import HfApi, HfFolder
 from peft import LoraConfig, get_peft_model
 os.environ['HF_HOME'] = '/tmp/huggingface_cache'
 # 从环境变量加载 Hugging Face token
 hf_token = os.getenv('HF_TOKEN')  # 假设你将 token 设置为环境变量
 if hf_token:
 else:
     raise ValueError("Hugging Face token 未设置")
+# 加载基础模型（DeepSeek-R1）
+model_name = "deepseek-ai/DeepSeek-R1"
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True,
+    token=hf_token  # 旧版 `use_auth_token` 被弃用
 )
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
     trust_remote_code=True,
     token=hf_token
 )
 # 读取数据文件
 with open('data.json', 'r', encoding='utf-8') as f:
     data = json.load(f)
 # 保存模型
 model.save_pretrained("./fst-nnn")
 tokenizer.save_pretrained("./fst-nnn")