Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
-
from transformers import AutoModelForCausalLM,
|
| 4 |
from datasets import Dataset
|
| 5 |
from huggingface_hub import HfApi, HfFolder
|
| 6 |
from peft import LoraConfig, get_peft_model
|
| 7 |
|
| 8 |
os.environ['HF_HOME'] = '/tmp/huggingface_cache'
|
| 9 |
|
| 10 |
-
|
| 11 |
# 从环境变量加载 Hugging Face token
|
| 12 |
hf_token = os.getenv('HF_TOKEN') # 假设你将 token 设置为环境变量
|
| 13 |
if hf_token:
|
|
@@ -15,24 +14,19 @@ if hf_token:
|
|
| 15 |
else:
|
| 16 |
raise ValueError("Hugging Face token 未设置")
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
quantization_config = BitsAndBytesConfig(load_in_4bit=True) # 使用 4-bit 量化,避免 fp8 错误
|
| 21 |
-
|
| 22 |
-
# 加载基础模型(例如:DeepSeek-R1)
|
| 23 |
-
model_name = "deepseek-ai/DeepSeek-R1" # 你可以根据需要调整基础模型
|
| 24 |
-
# ✅ 允许远程执行自定义代码,并启用 4-bit 量化
|
| 25 |
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
model_name,
|
| 27 |
trust_remote_code=True,
|
| 28 |
-
token=hf_token
|
| 29 |
-
quantization_config=quantization_config # 指定 4-bit 量化
|
| 30 |
)
|
| 31 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 32 |
model_name,
|
| 33 |
trust_remote_code=True,
|
| 34 |
token=hf_token
|
| 35 |
)
|
|
|
|
| 36 |
# 读取数据文件
|
| 37 |
with open('data.json', 'r', encoding='utf-8') as f:
|
| 38 |
data = json.load(f)
|
|
@@ -80,5 +74,3 @@ trainer.train()
|
|
| 80 |
# 保存模型
|
| 81 |
model.save_pretrained("./fst-nnn")
|
| 82 |
tokenizer.save_pretrained("./fst-nnn")
|
| 83 |
-
|
| 84 |
-
# 上传到 Hugging Face
|
|
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
+
from transformers import AutoModelForCausalLM, Trainer, TrainingArguments, AutoTokenizer
|
| 4 |
from datasets import Dataset
|
| 5 |
from huggingface_hub import HfApi, HfFolder
|
| 6 |
from peft import LoraConfig, get_peft_model
|
| 7 |
|
| 8 |
os.environ['HF_HOME'] = '/tmp/huggingface_cache'
|
| 9 |
|
|
|
|
| 10 |
# 从环境变量加载 Hugging Face token
|
| 11 |
hf_token = os.getenv('HF_TOKEN') # 假设你将 token 设置为环境变量
|
| 12 |
if hf_token:
|
|
|
|
| 14 |
else:
|
| 15 |
raise ValueError("Hugging Face token 未设置")
|
| 16 |
|
| 17 |
+
# 加载基础模型(DeepSeek-R1)
|
| 18 |
+
model_name = "deepseek-ai/DeepSeek-R1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
model = AutoModelForCausalLM.from_pretrained(
|
| 20 |
model_name,
|
| 21 |
trust_remote_code=True,
|
| 22 |
+
token=hf_token # 旧版 `use_auth_token` 被弃用
|
|
|
|
| 23 |
)
|
| 24 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 25 |
model_name,
|
| 26 |
trust_remote_code=True,
|
| 27 |
token=hf_token
|
| 28 |
)
|
| 29 |
+
|
| 30 |
# 读取数据文件
|
| 31 |
with open('data.json', 'r', encoding='utf-8') as f:
|
| 32 |
data = json.load(f)
|
|
|
|
| 74 |
# 保存模型
|
| 75 |
model.save_pretrained("./fst-nnn")
|
| 76 |
tokenizer.save_pretrained("./fst-nnn")
|
|
|
|
|
|