eyalnof123 commited on
Commit
b2938e0
·
verified ·
1 Parent(s): 0b85916

Upload train.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train.py +65 -75
train.py CHANGED
@@ -1,107 +1,97 @@
1
  #!/usr/bin/env python3
2
- import os
3
  os.environ["WANDB_DISABLED"] = "true"
4
 
5
- print("=== Installing dependencies ===")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import subprocess
7
  subprocess.check_call(["pip", "install", "-q", "unsloth", "trl", "datasets", "peft", "accelerate", "bitsandbytes"])
8
 
9
  print("=== Loading model ===")
 
10
  from unsloth import FastLanguageModel
11
-
12
- model, tokenizer = FastLanguageModel.from_pretrained(
13
- model_name="google/functiongemma-270m-it",
14
- max_seq_length=4096,
15
- load_in_4bit=False,
16
- )
17
 
18
  print("=== Applying LoRA ===")
19
- model = FastLanguageModel.get_peft_model(
20
- model,
21
- r=32,
22
- lora_alpha=64,
23
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
24
- lora_dropout=0,
25
- bias="none",
26
- use_gradient_checkpointing="unsloth",
27
- )
28
 
29
  print("=== Loading dataset ===")
 
30
  from datasets import load_dataset
31
-
32
  dataset = load_dataset("eyalnof123/su-lab-functiongemma-dataset")
33
- train_dataset = dataset["train"] if "train" in dataset else dataset
34
-
35
- if "train" not in dataset:
36
- train_dataset = load_dataset("eyalnof123/su-lab-functiongemma-dataset", data_files="train.jsonl", split="train")
37
-
38
  print(f"Training examples: {len(train_dataset)}")
39
 
40
- print("=== Starting training ===")
 
41
  from trl import SFTTrainer, SFTConfig
42
 
43
- training_args = SFTConfig(
44
- output_dir="./output",
45
- num_train_epochs=3,
46
- per_device_train_batch_size=2,
47
- gradient_accumulation_steps=4,
48
- learning_rate=2e-4,
49
- weight_decay=0.01,
50
- lr_scheduler_type="linear",
51
- warmup_steps=5,
52
- logging_steps=10,
53
- save_strategy="epoch",
54
- bf16=True,
55
- fp16=False,
56
- optim="adamw_8bit",
57
- max_seq_length=4096,
58
- dataset_text_field="text",
59
- seed=42,
60
- )
61
-
62
- trainer = SFTTrainer(
63
- model=model,
64
- args=training_args,
65
- train_dataset=train_dataset,
66
- tokenizer=tokenizer,
67
- )
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  trainer.train()
70
 
71
- print("=== Saving model ===")
 
72
  model.save_pretrained("./output/final")
73
  tokenizer.save_pretrained("./output/final")
74
 
75
  print("=== Pushing to Hub ===")
76
- from huggingface_hub import HfApi
 
77
  api = HfApi()
 
 
 
78
 
79
- try:
80
- from huggingface_hub import create_repo
81
- create_repo("eyalnof123/functiongemma-270m-su-lab", private=False)
82
- except:
83
- pass
84
-
85
- api.upload_folder(
86
- folder_path="./output/final",
87
- repo_id="eyalnof123/functiongemma-270m-su-lab",
88
- repo_type="model",
89
- )
90
-
91
- print("=== DONE! Model at: https://huggingface.co/eyalnof123/functiongemma-270m-su-lab ===")
92
-
93
- print("=== Exporting GGUF ===")
94
  try:
95
  model.save_pretrained_gguf("./output/gguf", tokenizer, quantization_method="q4_k_m")
96
- api.upload_folder(
97
- folder_path="./output/gguf",
98
- repo_id="eyalnof123/functiongemma-270m-su-lab",
99
- repo_type="model",
100
- path_in_repo="gguf",
101
- )
102
  print("GGUF uploaded!")
103
  except Exception as e:
104
- print(f"GGUF export failed (non-critical): {e}")
 
 
 
105
 
106
- print("ALL DONE")
107
- print("Model: https://huggingface.co/eyalnof123/functiongemma-270m-su-lab")
 
 
1
  #!/usr/bin/env python3
2
+ import os, threading
3
  os.environ["WANDB_DISABLED"] = "true"
4
 
5
+ # Health server so HF doesn't kill us for timeout
6
+ from http.server import HTTPServer, BaseHTTPRequestHandler
7
+ class H(BaseHTTPRequestHandler):
8
+ status = "starting"
9
+ def do_GET(self):
10
+ self.send_response(200)
11
+ self.send_header("Content-Type","text/html")
12
+ self.end_headers()
13
+ self.wfile.write(f"<h1>FunctionGemma Training</h1><p>Status: {H.status}</p>".encode())
14
+ def log_message(self, *a): pass
15
+ server = HTTPServer(("0.0.0.0", 7860), H)
16
+ threading.Thread(target=server.serve_forever, daemon=True).start()
17
+ print("Health server on :7860")
18
+
19
+ print("=== Installing ===")
20
+ H.status = "installing dependencies"
21
  import subprocess
22
  subprocess.check_call(["pip", "install", "-q", "unsloth", "trl", "datasets", "peft", "accelerate", "bitsandbytes"])
23
 
24
  print("=== Loading model ===")
25
+ H.status = "loading model"
26
  from unsloth import FastLanguageModel
27
+ model, tokenizer = FastLanguageModel.from_pretrained(model_name="google/functiongemma-270m-it", max_seq_length=4096, load_in_4bit=False)
 
 
 
 
 
28
 
29
  print("=== Applying LoRA ===")
30
+ H.status = "applying LoRA"
31
+ model = FastLanguageModel.get_peft_model(model, r=32, lora_alpha=64,
32
+ target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
33
+ lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth")
 
 
 
 
 
34
 
35
  print("=== Loading dataset ===")
36
+ H.status = "loading dataset"
37
  from datasets import load_dataset
 
38
  dataset = load_dataset("eyalnof123/su-lab-functiongemma-dataset")
39
+ train_dataset = dataset["train"] if "train" in dataset else load_dataset("eyalnof123/su-lab-functiongemma-dataset", data_files="train.jsonl", split="train")
 
 
 
 
40
  print(f"Training examples: {len(train_dataset)}")
41
 
42
+ print("=== Training ===")
43
+ H.status = "training epoch 1/3"
44
  from trl import SFTTrainer, SFTConfig
45
 
46
+ class StatusCallback:
47
+ def on_log(self, args, state, control, logs=None, **kw):
48
+ epoch = state.epoch or 0
49
+ H.status = f"training step {state.global_step}/{state.max_steps} epoch {epoch:.1f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ training_args = SFTConfig(
52
+ output_dir="./output", num_train_epochs=3, per_device_train_batch_size=2,
53
+ gradient_accumulation_steps=4, learning_rate=2e-4, weight_decay=0.01,
54
+ lr_scheduler_type="linear", warmup_steps=5, logging_steps=10,
55
+ save_strategy="epoch", bf16=True, fp16=False, optim="adamw_8bit",
56
+ max_seq_length=4096, dataset_text_field="text", seed=42)
57
+
58
+ from transformers import TrainerCallback
59
+ class SC(TrainerCallback):
60
+ def on_log(self, args, state, control, logs=None, **kw):
61
+ epoch = state.epoch or 0
62
+ H.status = f"training step {state.global_step}/{state.max_steps} epoch {epoch:.1f}"
63
+ def on_epoch_end(self, args, state, control, **kw):
64
+ H.status = f"saving checkpoint epoch {int(state.epoch)}"
65
+
66
+ trainer = SFTTrainer(model=model, args=training_args, train_dataset=train_dataset,
67
+ tokenizer=tokenizer, callbacks=[SC()])
68
  trainer.train()
69
 
70
+ print("=== Saving ===")
71
+ H.status = "saving model"
72
  model.save_pretrained("./output/final")
73
  tokenizer.save_pretrained("./output/final")
74
 
75
  print("=== Pushing to Hub ===")
76
+ H.status = "uploading to hub"
77
+ from huggingface_hub import HfApi, create_repo
78
  api = HfApi()
79
+ try: create_repo("eyalnof123/functiongemma-270m-su-lab", private=False)
80
+ except: pass
81
+ api.upload_folder(folder_path="./output/final", repo_id="eyalnof123/functiongemma-270m-su-lab", repo_type="model")
82
 
83
+ print("=== GGUF ===")
84
+ H.status = "exporting GGUF"
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  try:
86
  model.save_pretrained_gguf("./output/gguf", tokenizer, quantization_method="q4_k_m")
87
+ api.upload_folder(folder_path="./output/gguf", repo_id="eyalnof123/functiongemma-270m-su-lab", repo_type="model", path_in_repo="gguf")
 
 
 
 
 
88
  print("GGUF uploaded!")
89
  except Exception as e:
90
+ print(f"GGUF failed: {e}")
91
+
92
+ H.status = "DONE! Model at https://huggingface.co/eyalnof123/functiongemma-270m-su-lab"
93
+ print(H.status)
94
 
95
+ # Keep alive so you can see the status
96
+ import time
97
+ while True: time.sleep(60)