Qrverse commited on
Commit
7c4d692
·
verified ·
1 Parent(s): 9e7b788

Round 3 train script: push_to_hub fallback fix

Browse files
Files changed (1) hide show
  1. train-round3-hf-jobs.py +285 -0
train-round3-hf-jobs.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "unsloth",
5
+ # "datasets>=3.0",
6
+ # "huggingface_hub>=0.25",
7
+ # "trl>=0.15",
8
+ # "transformers>=4.50",
9
+ # "trackio",
10
+ # ]
11
+ # ///
12
+ """
13
+ QR-Verse AI — Round 3 Fine-Tuning (HuggingFace Jobs)
14
+ =====================================================
15
+
16
+ Round 3: Expanded dataset with 5 new categories:
17
+ - tool_use_advanced: Multi-step tool chains (2-3 tools)
18
+ - multilingual: Native conversations in all 7 locales
19
+ - edge_cases: Adversarial inputs, error handling
20
+ - domain_knowledge: Updated QR types, AI art, pricing
21
+ - site_health: 8 site health monitoring tools
22
+
23
+ Changes from Round 2:
24
+ - LoRA rank 32 → 64 (more capacity for 18 tools)
25
+ - ~11,234 total examples (7,454 R2 + 3,780 R3)
26
+ - 18 tools (10 QR core + 8 site health)
27
+ - A100 GPU (HF Pro plan)
28
+
29
+ Usage:
30
+ hf jobs uv run --flavor a100-large --timeout 4h \
31
+ --secrets HF_TOKEN \
32
+ https://huggingface.co/Qrverse/qr-verse-ai-lora/resolve/main/train-round3-hf-jobs.py
33
+ """
34
+
35
+ import os
36
+ import json
37
+ import logging
38
+
39
+ logging.basicConfig(
40
+ level=logging.INFO,
41
+ format="%(asctime)s [%(levelname)s] %(message)s",
42
+ datefmt="%Y-%m-%d %H:%M:%S",
43
+ )
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # 1. Configuration
49
+ # ---------------------------------------------------------------------------
50
+
51
+ BASE_MODEL = "unsloth/Qwen3-VL-8B-Instruct"
52
+ DATASET_ID = "QRVerse/qr-verse-training-data"
53
+ DATA_FILE = "training-data-round3.jsonl"
54
+ OUTPUT_REPO = "Qrverse/qr-verse-ai-lora"
55
+
56
+ # Training hyperparameters
57
+ TRAIN_EPOCHS = int(os.environ.get("TRAIN_EPOCHS", "3"))
58
+
59
+ # LoRA configuration — increased rank for 18 tools
60
+ LORA_RANK = 64 # up from 32 in R2
61
+ LORA_ALPHA = 128 # 2x rank
62
+ LORA_DROPOUT = 0.05
63
+
64
+ # Training configuration
65
+ LEARNING_RATE = 3e-6 # Lower for R3 (was 5e-6 in R2, 1e-5 in R1)
66
+ BATCH_SIZE = 4 # A100 has more VRAM, can use larger batch
67
+ GRADIENT_ACCUMULATION_STEPS = 4 # effective batch size = 16
68
+ MAX_SEQ_LENGTH = 4096
69
+ WARMUP_RATIO = 0.05
70
+ WEIGHT_DECAY = 0.01
71
+ LOGGING_STEPS = 10
72
+
73
+ OUTPUT_DIR = "./qr-verse-lora-output-r3"
74
+
75
+ SYSTEM_PROMPT = (
76
+ "You are QR-Verse AI, a helpful assistant for the QR-Verse platform. "
77
+ "You help users create, customize, and manage QR codes. You can generate "
78
+ "QR codes for URLs, WiFi networks, vCards, email, SMS, and 25+ other types. "
79
+ "You also support AI-powered QR code art generation with 157+ style presets, "
80
+ "and comprehensive site health monitoring including SSL, SEO, Core Web Vitals, "
81
+ "broken link scanning, and hreflang validation. "
82
+ "Always be concise, accurate, and helpful."
83
+ )
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # 2. Load base model with Unsloth (4-bit QLoRA)
88
+ # ---------------------------------------------------------------------------
89
+
90
+ logger.info("Loading base model: %s (4-bit QLoRA via Unsloth)", BASE_MODEL)
91
+
92
+ from unsloth import FastVisionModel
93
+
94
+ model, tokenizer = FastVisionModel.from_pretrained(
95
+ BASE_MODEL,
96
+ load_in_4bit=True,
97
+ max_seq_length=MAX_SEQ_LENGTH,
98
+ )
99
+
100
+ logger.info("Model loaded. Applying LoRA adapters (rank %d)...", LORA_RANK)
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # 3. Apply LoRA adapters (fresh — trains on full R3 dataset)
105
+ # ---------------------------------------------------------------------------
106
+
107
+ model = FastVisionModel.get_peft_model(
108
+ model,
109
+ r=LORA_RANK,
110
+ lora_alpha=LORA_ALPHA,
111
+ lora_dropout=LORA_DROPOUT,
112
+ target_modules=[
113
+ "q_proj", "k_proj", "v_proj", "o_proj",
114
+ "gate_proj", "up_proj", "down_proj",
115
+ ],
116
+ bias="none",
117
+ use_gradient_checkpointing="unsloth",
118
+ random_state=42,
119
+ )
120
+
121
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
122
+ total_params = sum(p.numel() for p in model.parameters())
123
+ logger.info(
124
+ "LoRA applied: %s trainable / %s total (%.2f%%)",
125
+ f"{trainable_params:,}", f"{total_params:,}",
126
+ 100.0 * trainable_params / total_params,
127
+ )
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # 4. Load Round 3 dataset
132
+ # ---------------------------------------------------------------------------
133
+
134
+ logger.info("Loading dataset: %s / %s", DATASET_ID, DATA_FILE)
135
+
136
+ from datasets import load_dataset
137
+
138
+ dataset = load_dataset(DATASET_ID, data_files=DATA_FILE, split="train")
139
+ logger.info("Dataset loaded: %d examples", len(dataset))
140
+
141
+
142
+ # ---------------------------------------------------------------------------
143
+ # 5. Format conversations with tokenizer chat template
144
+ # ---------------------------------------------------------------------------
145
+
146
+ logger.info("Formatting conversations...")
147
+
148
+ def format_conversations(examples):
149
+ texts = []
150
+ for messages in examples["messages"]:
151
+ text = tokenizer.apply_chat_template(
152
+ messages, tokenize=False, add_generation_prompt=False,
153
+ )
154
+ texts.append(text)
155
+ return {"text": texts}
156
+
157
+ dataset = dataset.map(
158
+ format_conversations, batched=True,
159
+ remove_columns=dataset.column_names, desc="Applying chat template",
160
+ )
161
+
162
+ logger.info("Dataset formatted: %d examples", len(dataset))
163
+
164
+
165
+ # ---------------------------------------------------------------------------
166
+ # 6. Configure SFTTrainer
167
+ # ---------------------------------------------------------------------------
168
+
169
+ logger.info("Configuring SFTTrainer (Round 3)...")
170
+
171
+ from trl import SFTTrainer, SFTConfig
172
+
173
+ sft_config = SFTConfig(
174
+ output_dir=OUTPUT_DIR,
175
+ save_strategy="epoch",
176
+ num_train_epochs=TRAIN_EPOCHS,
177
+ per_device_train_batch_size=BATCH_SIZE,
178
+ gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
179
+ learning_rate=LEARNING_RATE,
180
+ lr_scheduler_type="cosine",
181
+ warmup_ratio=WARMUP_RATIO,
182
+ weight_decay=WEIGHT_DECAY,
183
+ bf16=True,
184
+ fp16=False,
185
+ max_seq_length=MAX_SEQ_LENGTH,
186
+ logging_steps=LOGGING_STEPS,
187
+ logging_first_step=True,
188
+ report_to="trackio",
189
+ run_name="qr-verse-ai-round3",
190
+ dataset_text_field="text",
191
+ packing=False,
192
+ push_to_hub=True,
193
+ hub_model_id=OUTPUT_REPO,
194
+ hub_strategy="every_save",
195
+ hub_private_repo=True,
196
+ seed=42,
197
+ data_seed=42,
198
+ remove_unused_columns=True,
199
+ )
200
+
201
+ trainer = SFTTrainer(
202
+ model=model, tokenizer=tokenizer,
203
+ train_dataset=dataset, args=sft_config,
204
+ )
205
+
206
+ logger.info(
207
+ "SFTTrainer: %d epochs, lr=%.0e, batch=%d (eff=%d), %d examples, LoRA r=%d",
208
+ TRAIN_EPOCHS, LEARNING_RATE,
209
+ BATCH_SIZE, BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS,
210
+ len(dataset), LORA_RANK,
211
+ )
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # 7. Train
216
+ # ---------------------------------------------------------------------------
217
+
218
+ logger.info("Starting Round 3 training...")
219
+ train_result = trainer.train()
220
+
221
+
222
+ # ---------------------------------------------------------------------------
223
+ # 8. Log metrics
224
+ # ---------------------------------------------------------------------------
225
+
226
+ metrics = train_result.metrics
227
+ logger.info("=" * 60)
228
+ logger.info("ROUND 3 TRAINING COMPLETE")
229
+ logger.info("=" * 60)
230
+ logger.info(" Train loss: %.4f", metrics.get("train_loss", 0))
231
+ logger.info(" Runtime: %.1f seconds", metrics.get("train_runtime", 0))
232
+ logger.info(" Samples/sec: %.2f", metrics.get("train_samples_per_second", 0))
233
+ logger.info(" Epochs: %d", TRAIN_EPOCHS)
234
+ logger.info(" LoRA rank: %d", LORA_RANK)
235
+ logger.info("=" * 60)
236
+
237
+
238
+ # ---------------------------------------------------------------------------
239
+ # 9. Save and push LoRA adapter
240
+ # ---------------------------------------------------------------------------
241
+
242
+ LOCAL_ADAPTER_DIR = os.path.join(OUTPUT_DIR, "final-adapter-r3")
243
+ logger.info("Saving LoRA adapter: %s", LOCAL_ADAPTER_DIR)
244
+ model.save_pretrained(LOCAL_ADAPTER_DIR)
245
+ tokenizer.save_pretrained(LOCAL_ADAPTER_DIR)
246
+
247
+ logger.info("Pushing Round 3 adapter to Hub: %s", OUTPUT_REPO)
248
+ commit_msg = f"Round 3 LoRA: {len(dataset)} examples, {TRAIN_EPOCHS} epochs, lr {LEARNING_RATE}, rank {LORA_RANK}"
249
+
250
+ # Try push_to_hub first, fallback to upload_folder if LFS 403
251
+ try:
252
+ model.push_to_hub(
253
+ OUTPUT_REPO, tokenizer=tokenizer,
254
+ commit_message=commit_msg,
255
+ private=False,
256
+ )
257
+ logger.info("Round 3 adapter pushed via push_to_hub")
258
+ except Exception as e:
259
+ logger.warning("push_to_hub failed: %s — trying upload_folder fallback...", e)
260
+ from huggingface_hub import HfApi
261
+ api = HfApi()
262
+ api.upload_folder(
263
+ folder_path=LOCAL_ADAPTER_DIR,
264
+ repo_id=OUTPUT_REPO,
265
+ repo_type="model",
266
+ commit_message=commit_msg,
267
+ )
268
+ logger.info("Round 3 adapter pushed via upload_folder fallback")
269
+
270
+ logger.info("Round 3 adapter on Hub: https://huggingface.co/%s", OUTPUT_REPO)
271
+
272
+ print("\n" + "=" * 60)
273
+ print("ROUND 3 COMPLETE")
274
+ print("=" * 60)
275
+ print(f" Dataset: {len(dataset)} examples ({DATA_FILE})")
276
+ print(f" LoRA: rank {LORA_RANK}, alpha {LORA_ALPHA}")
277
+ print(f" LR: {LEARNING_RATE}")
278
+ print(f" Final loss: {metrics.get('train_loss', 'N/A')}")
279
+ print(f" Hub: https://huggingface.co/{OUTPUT_REPO}")
280
+ print()
281
+ print("Next steps:")
282
+ print(" 1. Run convert-gguf-hf-jobs.py (F16 GGUF)")
283
+ print(" 2. Run quantize-gguf-hf-jobs.py (Q4_K_M)")
284
+ print(" 3. Download Q4_K_M + Modelfile -> ollama create qr-verse-ai")
285
+ print("=" * 60)