Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
|
@@ -15,13 +15,24 @@ if hf_token:
|
|
| 15 |
else:
|
| 16 |
raise ValueError("Hugging Face token 未设置")
|
| 17 |
|
| 18 |
-
# 加载基础模型(例如:DeepSeek-R1)
|
| 19 |
-
model_name = "deepseek-ai/DeepSeek-R1" # 你可以根据需要调整基础模型
|
| 20 |
-
# ✅ 允许远程执行自定义代码
|
| 21 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_token)
|
| 22 |
|
| 23 |
-
|
|
|
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# 读取数据文件
|
| 26 |
with open('data.json', 'r', encoding='utf-8') as f:
|
| 27 |
data = json.load(f)
|
|
|
|
| 15 |
else:
|
| 16 |
raise ValueError("Hugging Face token 未设置")
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# ✅ 设置量化配置,避免 fp8 量化错误
|
| 20 |
+
quantization_config = BitsAndBytesConfig(load_in_4bit=True) # 使用 4-bit 量化,避免 fp8 错误
|
| 21 |
|
| 22 |
+
# 加载基础模型(例如:DeepSeek-R1)
|
| 23 |
+
model_name = "deepseek-ai/DeepSeek-R1" # 你可以根据需要调整基础模型
|
| 24 |
+
# ✅ 允许远程执行自定义代码,并启用 4-bit 量化
|
| 25 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
+
model_name,
|
| 27 |
+
trust_remote_code=True,
|
| 28 |
+
token=hf_token, # 旧版 `use_auth_token` 被弃用
|
| 29 |
+
quantization_config=quantization_config # 指定 4-bit 量化
|
| 30 |
+
)
|
| 31 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 32 |
+
model_name,
|
| 33 |
+
trust_remote_code=True,
|
| 34 |
+
token=hf_token
|
| 35 |
+
)
|
| 36 |
# 读取数据文件
|
| 37 |
with open('data.json', 'r', encoding='utf-8') as f:
|
| 38 |
data = json.load(f)
|