unesco-data-ai commited on
Commit
8797ed2
·
verified ·
1 Parent(s): b133a83

Upload train_unesco_tagger.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_unesco_tagger.py +9 -20
train_unesco_tagger.py CHANGED
@@ -1,7 +1,6 @@
1
  # /// script
2
  # dependencies = [
3
  # "trl>=0.12.0",
4
- # "peft>=0.7.0",
5
  # "transformers>=4.36.0",
6
  # "accelerate>=0.24.0",
7
  # "trackio",
@@ -9,7 +8,6 @@
9
  # ///
10
 
11
  from datasets import load_dataset
12
- from peft import LoraConfig
13
  from trl import SFTTrainer, SFTConfig
14
 
15
  print("Loading dataset...")
@@ -20,15 +18,15 @@ eval_dataset = dataset["validation"]
20
  print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
21
 
22
  config = SFTConfig(
23
- output_dir="qwen2.5-3b-unesco-tagger",
24
  push_to_hub=True,
25
- hub_model_id="unesco-data-ai/qwen2.5-3b-unesco-tagger-v1",
26
  hub_strategy="every_save",
27
  num_train_epochs=3,
28
- per_device_train_batch_size=2,
29
- gradient_accumulation_steps=8,
30
  learning_rate=2e-5,
31
- max_length=2048,
32
  logging_steps=10,
33
  save_strategy="steps",
34
  save_steps=200,
@@ -37,27 +35,18 @@ config = SFTConfig(
37
  eval_steps=200,
38
  warmup_ratio=0.1,
39
  lr_scheduler_type="cosine",
 
40
  report_to="trackio",
41
  project="unesco-keyword-extraction",
42
- run_name="qwen2.5-3b-sft-v1",
43
- )
44
-
45
- peft_config = LoraConfig(
46
- r=16,
47
- lora_alpha=32,
48
- lora_dropout=0.05,
49
- bias="none",
50
- task_type="CAUSAL_LM",
51
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
52
  )
53
 
54
  print("Initializing trainer...")
55
  trainer = SFTTrainer(
56
- model="Qwen/Qwen2.5-3B-Instruct",
57
  train_dataset=train_dataset,
58
  eval_dataset=eval_dataset,
59
  args=config,
60
- peft_config=peft_config,
61
  )
62
 
63
  print("Starting training...")
@@ -66,4 +55,4 @@ trainer.train()
66
  print("Pushing to Hub...")
67
  trainer.push_to_hub()
68
 
69
- print("Complete!")
 
1
  # /// script
2
  # dependencies = [
3
  # "trl>=0.12.0",
 
4
  # "transformers>=4.36.0",
5
  # "accelerate>=0.24.0",
6
  # "trackio",
 
8
  # ///
9
 
10
  from datasets import load_dataset
 
11
  from trl import SFTTrainer, SFTConfig
12
 
13
  print("Loading dataset...")
 
18
  print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
19
 
20
  config = SFTConfig(
21
+ output_dir="lfm2.5-1.2b-unesco-tagger",
22
  push_to_hub=True,
23
+ hub_model_id="unesco-data-ai/lfm2.5-1.2b-unesco-tagger-v1",
24
  hub_strategy="every_save",
25
  num_train_epochs=3,
26
+ per_device_train_batch_size=4,
27
+ gradient_accumulation_steps=4,
28
  learning_rate=2e-5,
29
+ max_length=1024,
30
  logging_steps=10,
31
  save_strategy="steps",
32
  save_steps=200,
 
35
  eval_steps=200,
36
  warmup_ratio=0.1,
37
  lr_scheduler_type="cosine",
38
+ bf16=True,
39
  report_to="trackio",
40
  project="unesco-keyword-extraction",
41
+ run_name="lfm2.5-1.2b-sft-v1",
 
 
 
 
 
 
 
 
 
42
  )
43
 
44
  print("Initializing trainer...")
45
  trainer = SFTTrainer(
46
+ model="LiquidAI/LFM2.5-1.2B-Instruct",
47
  train_dataset=train_dataset,
48
  eval_dataset=eval_dataset,
49
  args=config,
 
50
  )
51
 
52
  print("Starting training...")
 
55
  print("Pushing to Hub...")
56
  trainer.push_to_hub()
57
 
58
+ print("Complete! Model at: https://huggingface.co/unesco-data-ai/lfm2.5-1.2b-unesco-tagger-v1")