Qrverse commited on
Commit
ae0bc13
·
verified ·
1 Parent(s): f8f2d27

Round 2 training script (lower LR, expanded dataset)

Browse files
Files changed (1) hide show
  1. train-round2-hf-jobs.py +259 -0
train-round2-hf-jobs.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "unsloth",
5
+ # "datasets>=3.0",
6
+ # "huggingface_hub>=0.25",
7
+ # "trl>=0.15",
8
+ # "transformers>=4.50",
9
+ # "trackio",
10
+ # ]
11
+ # ///
12
+ """
13
+ QR-Verse AI — Round 2 Fine-Tuning (HuggingFace Jobs)
14
+ =====================================================
15
+
16
+ Continues fine-tuning from Round 1 LoRA adapter with expanded dataset:
17
+ - Round 1 base: 7,300 examples
18
+ - ChromaDB knowledge: 41 user-facing knowledge examples
19
+ - AI art quality: ~100 vision/quality gate examples
20
+
21
+ Uses `training-data-round2.jsonl` from the dataset repo.
22
+
23
+ Usage:
24
+ hf jobs uv run --flavor a10g-small --timeout 2h \
25
+ --secrets HF_TOKEN \
26
+ https://huggingface.co/Qrverse/qr-verse-ai-lora/resolve/main/train-round2-hf-jobs.py
27
+ """
28
+
29
+ import os
30
+ import json
31
+ import logging
32
+
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format="%(asctime)s [%(levelname)s] %(message)s",
36
+ datefmt="%Y-%m-%d %H:%M:%S",
37
+ )
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # 1. Configuration
43
+ # ---------------------------------------------------------------------------
44
+
45
+ BASE_MODEL = "unsloth/Qwen3-VL-8B-Instruct"
46
+ DATASET_ID = "QRVerse/qr-verse-training-data"
47
+ DATA_FILE = "training-data-round2.jsonl"
48
+ OUTPUT_REPO = "Qrverse/qr-verse-ai-lora"
49
+
50
+ # Training hyperparameters
51
+ TRAIN_EPOCHS = int(os.environ.get("TRAIN_EPOCHS", "3"))
52
+
53
+ # LoRA configuration (same as Round 1 for compatibility)
54
+ LORA_RANK = 32
55
+ LORA_ALPHA = 64
56
+ LORA_DROPOUT = 0.05
57
+
58
+ # Training configuration
59
+ LEARNING_RATE = 5e-6 # Lower LR for round 2 (was 1e-5 in Round 1)
60
+ BATCH_SIZE = 2
61
+ GRADIENT_ACCUMULATION_STEPS = 8 # effective batch size = 16
62
+ MAX_SEQ_LENGTH = 4096
63
+ WARMUP_RATIO = 0.05
64
+ WEIGHT_DECAY = 0.01
65
+ LOGGING_STEPS = 10
66
+
67
+ OUTPUT_DIR = "./qr-verse-lora-output-r2"
68
+
69
+ SYSTEM_PROMPT = (
70
+ "You are QR-Verse AI, a helpful assistant for the QR-Verse platform. "
71
+ "You help users create, customize, and manage QR codes. You can generate "
72
+ "QR codes for URLs, WiFi networks, vCards, email, SMS, and 20+ other types. "
73
+ "You also support AI-powered QR code art generation with 157+ style presets. "
74
+ "Always be concise, accurate, and helpful."
75
+ )
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # 2. Load base model with Unsloth (4-bit QLoRA)
80
+ # ---------------------------------------------------------------------------
81
+
82
+ logger.info("Loading base model: %s (4-bit QLoRA via Unsloth)", BASE_MODEL)
83
+
84
+ from unsloth import FastVisionModel
85
+
86
+ model, tokenizer = FastVisionModel.from_pretrained(
87
+ BASE_MODEL,
88
+ load_in_4bit=True,
89
+ max_seq_length=MAX_SEQ_LENGTH,
90
+ )
91
+
92
+ logger.info("Model loaded. Applying LoRA adapters (fresh — trains on full R2 dataset)...")
93
+
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # 3. Apply LoRA adapters
97
+ # ---------------------------------------------------------------------------
98
+
99
+ model = FastVisionModel.get_peft_model(
100
+ model,
101
+ r=LORA_RANK,
102
+ lora_alpha=LORA_ALPHA,
103
+ lora_dropout=LORA_DROPOUT,
104
+ target_modules=[
105
+ "q_proj", "k_proj", "v_proj", "o_proj",
106
+ "gate_proj", "up_proj", "down_proj",
107
+ ],
108
+ bias="none",
109
+ use_gradient_checkpointing="unsloth",
110
+ random_state=42,
111
+ )
112
+
113
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
114
+ total_params = sum(p.numel() for p in model.parameters())
115
+ logger.info(
116
+ "LoRA applied: %s trainable / %s total (%.2f%%)",
117
+ f"{trainable_params:,}", f"{total_params:,}",
118
+ 100.0 * trainable_params / total_params,
119
+ )
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # 4. Load Round 2 dataset
124
+ # ---------------------------------------------------------------------------
125
+
126
+ logger.info("Loading dataset: %s / %s", DATASET_ID, DATA_FILE)
127
+
128
+ from datasets import load_dataset
129
+
130
+ dataset = load_dataset(DATASET_ID, data_files=DATA_FILE, split="train")
131
+ logger.info("Dataset loaded: %d examples", len(dataset))
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # 5. Format conversations with tokenizer chat template
136
+ # ---------------------------------------------------------------------------
137
+
138
+ logger.info("Formatting conversations...")
139
+
140
+ def format_conversations(examples):
141
+ texts = []
142
+ for messages in examples["messages"]:
143
+ text = tokenizer.apply_chat_template(
144
+ messages, tokenize=False, add_generation_prompt=False,
145
+ )
146
+ texts.append(text)
147
+ return {"text": texts}
148
+
149
+ dataset = dataset.map(
150
+ format_conversations, batched=True,
151
+ remove_columns=dataset.column_names, desc="Applying chat template",
152
+ )
153
+
154
+ logger.info("Dataset formatted: %d examples", len(dataset))
155
+
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # 6. Configure SFTTrainer
159
+ # ---------------------------------------------------------------------------
160
+
161
+ logger.info("Configuring SFTTrainer (Round 2)...")
162
+
163
+ from trl import SFTTrainer, SFTConfig
164
+
165
+ sft_config = SFTConfig(
166
+ output_dir=OUTPUT_DIR,
167
+ save_strategy="epoch",
168
+ num_train_epochs=TRAIN_EPOCHS,
169
+ per_device_train_batch_size=BATCH_SIZE,
170
+ gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
171
+ learning_rate=LEARNING_RATE,
172
+ lr_scheduler_type="cosine",
173
+ warmup_ratio=WARMUP_RATIO,
174
+ weight_decay=WEIGHT_DECAY,
175
+ bf16=True,
176
+ fp16=False,
177
+ max_seq_length=MAX_SEQ_LENGTH,
178
+ logging_steps=LOGGING_STEPS,
179
+ logging_first_step=True,
180
+ report_to="trackio",
181
+ run_name="qr-verse-ai-round2",
182
+ dataset_text_field="text",
183
+ packing=False,
184
+ push_to_hub=True,
185
+ hub_model_id=OUTPUT_REPO,
186
+ hub_strategy="every_save",
187
+ hub_private_repo=True,
188
+ seed=42,
189
+ data_seed=42,
190
+ remove_unused_columns=True,
191
+ )
192
+
193
+ trainer = SFTTrainer(
194
+ model=model, tokenizer=tokenizer,
195
+ train_dataset=dataset, args=sft_config,
196
+ )
197
+
198
+ logger.info(
199
+ "SFTTrainer: %d epochs, lr=%.0e, batch=%d, %d examples",
200
+ TRAIN_EPOCHS, LEARNING_RATE,
201
+ BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS, len(dataset),
202
+ )
203
+
204
+
205
+ # ---------------------------------------------------------------------------
206
+ # 7. Train
207
+ # ---------------------------------------------------------------------------
208
+
209
+ logger.info("Starting Round 2 training...")
210
+ train_result = trainer.train()
211
+
212
+
213
+ # ---------------------------------------------------------------------------
214
+ # 8. Log metrics
215
+ # ---------------------------------------------------------------------------
216
+
217
+ metrics = train_result.metrics
218
+ logger.info("=" * 60)
219
+ logger.info("ROUND 2 TRAINING COMPLETE")
220
+ logger.info("=" * 60)
221
+ logger.info(" Train loss: %.4f", metrics.get("train_loss", 0))
222
+ logger.info(" Runtime: %.1f seconds", metrics.get("train_runtime", 0))
223
+ logger.info(" Samples/sec: %.2f", metrics.get("train_samples_per_second", 0))
224
+ logger.info(" Epochs: %d", TRAIN_EPOCHS)
225
+ logger.info("=" * 60)
226
+
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # 9. Save and push LoRA adapter
230
+ # ---------------------------------------------------------------------------
231
+
232
+ LOCAL_ADAPTER_DIR = os.path.join(OUTPUT_DIR, "final-adapter-r2")
233
+ logger.info("Saving LoRA adapter: %s", LOCAL_ADAPTER_DIR)
234
+ model.save_pretrained(LOCAL_ADAPTER_DIR)
235
+ tokenizer.save_pretrained(LOCAL_ADAPTER_DIR)
236
+
237
+ logger.info("Pushing Round 2 adapter to Hub: %s", OUTPUT_REPO)
238
+ model.push_to_hub(
239
+ OUTPUT_REPO, tokenizer=tokenizer,
240
+ commit_message=f"Round 2 LoRA: {len(dataset)} examples, {TRAIN_EPOCHS} epochs, lr {LEARNING_RATE}",
241
+ private=True,
242
+ )
243
+
244
+ logger.info("Round 2 adapter pushed: https://huggingface.co/%s", OUTPUT_REPO)
245
+
246
+ print("\n" + "=" * 60)
247
+ print("ROUND 2 COMPLETE")
248
+ print("=" * 60)
249
+ print(f" Dataset: {len(dataset)} examples ({DATA_FILE})")
250
+ print(f" LoRA: rank {LORA_RANK}, alpha {LORA_ALPHA}")
251
+ print(f" LR: {LEARNING_RATE}")
252
+ print(f" Final loss: {metrics.get('train_loss', 'N/A')}")
253
+ print(f" Hub: https://huggingface.co/{OUTPUT_REPO}")
254
+ print()
255
+ print("Next steps:")
256
+ print(" 1. Run convert-gguf-hf-jobs.py (F16 GGUF)")
257
+ print(" 2. Run quantize-gguf-hf-jobs.py (Q4_K_M)")
258
+ print(" 3. Download Q4_K_M + Modelfile → ollama create qr-verse-ai")
259
+ print("=" * 60)