Snow2222 commited on
Commit
0b272a1
·
verified ·
1 Parent(s): d6c06f6

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +5 -13
train.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
  import json
3
- from transformers import AutoModelForCausalLM,AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer, BitsAndBytesConfig
4
  from datasets import Dataset
5
  from huggingface_hub import HfApi, HfFolder
6
  from peft import LoraConfig, get_peft_model
7
 
8
  os.environ['HF_HOME'] = '/tmp/huggingface_cache'
9
 
10
-
11
  # 从环境变量加载 Hugging Face token
12
  hf_token = os.getenv('HF_TOKEN') # 假设你将 token 设置为环境变量
13
  if hf_token:
@@ -15,24 +14,19 @@ if hf_token:
15
  else:
16
  raise ValueError("Hugging Face token 未设置")
17
 
18
-
19
- # 设置量化配置,避免 fp8 量化错误
20
- quantization_config = BitsAndBytesConfig(load_in_4bit=True) # 使用 4-bit 量化,避免 fp8 错误
21
-
22
- # 加载基础模型(例如:DeepSeek-R1)
23
- model_name = "deepseek-ai/DeepSeek-R1" # 你可以根据需要调整基础模型
24
- # ✅ 允许远程执行自定义代码,并启用 4-bit 量化
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
  trust_remote_code=True,
28
- token=hf_token, # 旧版 `use_auth_token` 被弃用
29
- quantization_config=quantization_config # 指定 4-bit 量化
30
  )
31
  tokenizer = AutoTokenizer.from_pretrained(
32
  model_name,
33
  trust_remote_code=True,
34
  token=hf_token
35
  )
 
36
  # 读取数据文件
37
  with open('data.json', 'r', encoding='utf-8') as f:
38
  data = json.load(f)
@@ -80,5 +74,3 @@ trainer.train()
80
  # 保存模型
81
  model.save_pretrained("./fst-nnn")
82
  tokenizer.save_pretrained("./fst-nnn")
83
-
84
- # 上传到 Hugging Face
 
1
  import os
2
  import json
3
+ from transformers import AutoModelForCausalLM, Trainer, TrainingArguments, AutoTokenizer
4
  from datasets import Dataset
5
  from huggingface_hub import HfApi, HfFolder
6
  from peft import LoraConfig, get_peft_model
7
 
8
  os.environ['HF_HOME'] = '/tmp/huggingface_cache'
9
 
 
10
  # 从环境变量加载 Hugging Face token
11
  hf_token = os.getenv('HF_TOKEN') # 假设你将 token 设置为环境变量
12
  if hf_token:
 
14
  else:
15
  raise ValueError("Hugging Face token 未设置")
16
 
17
+ # 加载基础模型(DeepSeek-R1)
18
+ model_name = "deepseek-ai/DeepSeek-R1"
 
 
 
 
 
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
  trust_remote_code=True,
22
+ token=hf_token # 旧版 `use_auth_token` 被弃用
 
23
  )
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  model_name,
26
  trust_remote_code=True,
27
  token=hf_token
28
  )
29
+
30
  # 读取数据文件
31
  with open('data.json', 'r', encoding='utf-8') as f:
32
  data = json.load(f)
 
74
  # 保存模型
75
  model.save_pretrained("./fst-nnn")
76
  tokenizer.save_pretrained("./fst-nnn")