DDDano333 commited on
Commit
85b01ed
·
1 Parent(s): 07e2f18

new config

Browse files
Files changed (2) hide show
  1. samples.json +0 -0
  2. train.py +39 -63
samples.json CHANGED
The diff for this file is too large to render. See raw diff
 
train.py CHANGED
@@ -1,30 +1,14 @@
1
  import os
2
  import torch
3
  import torch.nn as nn
4
- import bitsandbytes as bnb
5
  from datasets import load_dataset
6
  import transformers
7
  from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer
8
  from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
9
-
10
- # Import the necessary Accelerate modules
11
- from accelerate import Accelerator, DistributedType
12
 
13
  def train():
14
- # Initialize the Accelerator
15
- accelerator = Accelerator(
16
- device_placement=True,
17
- split_batches=False,
18
- mixed_precision="fp16",
19
- # distributed_type=DistributedType.MULTI_GPU,
20
- gradient_accumulation_steps=1,
21
- rng_types=["torch", "cuda"],
22
- log_with=["tensorboard", "wandb", "comet_ml"],
23
- project_dir="./",
24
- even_batches=True,
25
- step_scheduler_with_optimizer=True
26
- )
27
-
28
  MICRO_BATCH_SIZE = 1
29
  BATCH_SIZE = 16
30
  GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
@@ -34,15 +18,11 @@ def train():
34
  LORA_ALPHA = 8
35
  LORA_DROPOUT = 0.05
36
 
37
- device = accelerator.device
38
 
39
  model = LLaMAForCausalLM.from_pretrained(
40
- "decapoda-research/llama-7b-hf",
41
- load_in_8bit=True,
42
- device_map="auto",
43
  )
44
-
45
- model = accelerator.prepare(model)
46
  tokenizer = LLaMATokenizer.from_pretrained(
47
  "decapoda-research/llama-7b-hf", add_eos_token=True
48
  )
@@ -59,52 +39,48 @@ def train():
59
  )
60
  model = get_peft_model(model, config)
61
  tokenizer.pad_token_id = 0
62
- data = load_dataset("json", data_files="../samples.json")
63
 
64
  def generate_prompt(data_point):
65
  if data_point["input"]:
66
- return f"""### Instruction:
67
- {data_point["instruction"]}
68
- ### Input:
69
- {data_point["input"]}
70
- ### Response:
71
- {data_point["output"]}"""
72
  else:
73
- return f"""### Instruction:
74
- {data_point["instruction"]}
75
- ### Response:
76
- {data_point["output"]}"""
77
 
78
- data = data.shuffle().map(
79
- lambda data_point: tokenizer(
80
- generate_prompt(data_point),
81
- truncation=False,
82
- padding='longest',
83
- )
84
- )
85
 
86
- training_args = transformers.TrainingArguments(
87
- per_device_train_batch_size=MICRO_BATCH_SIZE,
88
- gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
89
- warmup_steps=100,
90
- num_train_epochs=EPOCHS,
91
- learning_rate=LEARNING_RATE,
92
- logging_steps=1,
93
- output_dir=f"lora-smartscraper-{accelerator.process_index}",
94
- save_total_limit=3,
95
- )
96
- # training_args = accelerator.update_arguments(training_args)
97
 
98
- trainer = transformers.Trainer(
99
- model=model,
100
- train_dataset=data["train"],
101
- args=training_args,
102
- data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
103
- )
104
- model.config.use_cache = False
105
- trainer.train(resume_from_checkpoint=False)
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  model.save_pretrained(f"lora-smartscraper-{accelerator.process_index}")
108
 
109
- if __name__ == "__main__":
110
- train()
 
1
  import os
2
  import torch
3
  import torch.nn as nn
 
4
  from datasets import load_dataset
5
  import transformers
6
  from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer
7
  from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
8
+ from accelerate import Accelerator
9
+ from torch.utils.data import DataLoader
 
10
 
11
  def train():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  MICRO_BATCH_SIZE = 1
13
  BATCH_SIZE = 16
14
  GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
 
18
  LORA_ALPHA = 8
19
  LORA_DROPOUT = 0.05
20
 
21
+ accelerator = Accelerator()
22
 
23
  model = LLaMAForCausalLM.from_pretrained(
24
+ "decapoda-research/llama-7b-hf"
 
 
25
  )
 
 
26
  tokenizer = LLaMATokenizer.from_pretrained(
27
  "decapoda-research/llama-7b-hf", add_eos_token=True
28
  )
 
39
  )
40
  model = get_peft_model(model, config)
41
  tokenizer.pad_token_id = 0
42
+ data = load_dataset("json", data_files="samples.json")
43
 
44
  def generate_prompt(data_point):
45
  if data_point["input"]:
46
+ prompt = f"""### Instruction:
47
+ {data_point["instruction"]}
48
+ ### Input:
49
+ {data_point["input"]}
50
+ ### Response:
51
+ {data_point["output"]}"""
52
  else:
53
+ prompt = f"""### Instruction:
54
+ {data_point["instruction"]}
55
+ ### Response:
56
+ {data_point["output"]}"""
57
 
58
+ input_tokens = tokenizer(prompt, truncation=False, padding='longest', return_tensors='pt')
59
+ output_tokens = tokenizer(data_point["output"], truncation=False, padding='longest', return_tensors='pt')
 
 
 
 
 
60
 
61
+ return input_tokens, output_tokens["input_ids"].squeeze()
 
 
 
 
 
 
 
 
 
 
62
 
63
+ data = data.shuffle().map(generate_prompt)
64
+
65
+ optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
66
+ model, optimizer = accelerator.prepare(model, optimizer)
67
+
68
+ train_dataloader = DataLoader(data["train"], batch_size=MICRO_BATCH_SIZE, shuffle=True)
69
+ train_dataloader = accelerator.prepare(train_dataloader)
70
+
71
+ for epoch in range(EPOCHS):
72
+ for step, batch in enumerate(train_dataloader):
73
+ inputs, labels = batch
74
+ inputs_tensor = torch.tensor(inputs["input_ids"], dtype=torch.long).unsqueeze(0).to(accelerator.device)
75
+ outputs = model(inputs_tensor)
76
+ labels_tensor = torch.tensor(labels, dtype=torch.long).to(accelerator.device)
77
+ loss = nn.CrossEntropyLoss()(outputs.logits.view(-1, outputs.logits.size(-1)), labels_tensor.view(-1))
78
+
79
+ accelerator.backward(loss)
80
+ optimizer.step()
81
+ optimizer.zero_grad()
82
 
83
  model.save_pretrained(f"lora-smartscraper-{accelerator.process_index}")
84
 
85
+ if __name__ == "__main__":
86
+ train()