Qrverse commited on
Commit
a73c6e8
·
verified ·
1 Parent(s): a639f97

Upload train-round4-hf-jobs.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train-round4-hf-jobs.py +293 -0
train-round4-hf-jobs.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "unsloth",
5
+ # "datasets>=3.0",
6
+ # "huggingface_hub>=0.25",
7
+ # "trl>=0.15",
8
+ # "transformers>=4.50",
9
+ # "trackio",
10
+ # ]
11
+ # ///
12
+ """
13
+ QR-Verse AI — Round 4 Fine-Tuning (HuggingFace Jobs)
14
+ =====================================================
15
+
16
+ Round 4: MEGA dataset focused on CONCISE, DIRECT responses.
17
+ 6,000 examples across 10 categories:
18
+ - concise_responses (1200): Short 1-3 sentence answers
19
+ - multi_turn (900): Natural 3-5 turn conversations
20
+ - tool_use (800): All 18 tools, single and multi-tool
21
+ - business_scenarios (600): Industry-specific (8 industries)
22
+ - ai_art_expertise (500): Presets, fusion, quality
23
+ - site_health (500): 8 site health tools
24
+ - error_handling (400): Graceful error recovery
25
+ - personality (400): Warm but concise
26
+ - competitor_knowledge (400): QR-Verse vs competitors
27
+ - quick_actions (300): Ultra-short tool exchanges
28
+
29
+ Changes from Round 3:
30
+ - 6,000 examples (up from 3,766)
31
+ - 10 categories (up from 5)
32
+ - Focus on CONCISE responses (no filler phrases)
33
+ - Multi-turn conversations (3-5 turns)
34
+ - Competitor knowledge
35
+ - Cultural language sensitivity
36
+
37
+ Usage:
38
+ hf jobs uv run --flavor a10g-large --timeout 5h \
39
+ --secrets HF_TOKEN \
40
+ https://huggingface.co/Qrverse/qr-verse-ai-lora/resolve/main/train-round4-hf-jobs.py
41
+ """
42
+
43
+ import os
44
+ import json
45
+ import logging
46
+
47
+ logging.basicConfig(
48
+ level=logging.INFO,
49
+ format="%(asctime)s [%(levelname)s] %(message)s",
50
+ datefmt="%Y-%m-%d %H:%M:%S",
51
+ )
52
+ logger = logging.getLogger(__name__)
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # 1. Configuration
57
+ # ---------------------------------------------------------------------------
58
+
59
+ BASE_MODEL = "unsloth/Qwen3-VL-8B-Instruct"
60
+ DATASET_ID = "QRVerse/qr-verse-training-data"
61
+ DATA_FILE = "training-data-round4.jsonl"
62
+ OUTPUT_REPO = "Qrverse/qr-verse-ai-lora"
63
+
64
+ # Training hyperparameters
65
+ TRAIN_EPOCHS = int(os.environ.get("TRAIN_EPOCHS", "3"))
66
+
67
+ # LoRA configuration — same rank as R3, proven capacity for 18 tools
68
+ LORA_RANK = 64
69
+ LORA_ALPHA = 128 # 2x rank
70
+ LORA_DROPOUT = 0.05
71
+
72
+ # Training configuration
73
+ LEARNING_RATE = 2e-6 # Lower for R4 (more data: 6000 vs 3766)
74
+ BATCH_SIZE = 4
75
+ GRADIENT_ACCUMULATION_STEPS = 4 # effective batch size = 16
76
+ MAX_SEQ_LENGTH = 4096
77
+ WARMUP_RATIO = 0.05
78
+ WEIGHT_DECAY = 0.01
79
+ LOGGING_STEPS = 10
80
+
81
+ OUTPUT_DIR = "./qr-verse-lora-output-r4"
82
+
83
+ SYSTEM_PROMPT = (
84
+ "You are QR-Verse AI, a helpful assistant for the QR-Verse platform. "
85
+ "You help users create, customize, and manage QR codes. You can generate "
86
+ "QR codes for URLs, WiFi networks, vCards, email, SMS, and 25+ other types. "
87
+ "You also support AI-powered QR code art generation with 157+ style presets, "
88
+ "and comprehensive site health monitoring including SSL, SEO, Core Web Vitals, "
89
+ "broken link scanning, and hreflang validation. "
90
+ "Always be concise, accurate, and helpful."
91
+ )
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # 2. Load base model with Unsloth (4-bit QLoRA)
96
+ # ---------------------------------------------------------------------------
97
+
98
+ logger.info("Loading base model: %s (4-bit QLoRA via Unsloth)", BASE_MODEL)
99
+
100
+ from unsloth import FastVisionModel
101
+
102
+ model, tokenizer = FastVisionModel.from_pretrained(
103
+ BASE_MODEL,
104
+ load_in_4bit=True,
105
+ max_seq_length=MAX_SEQ_LENGTH,
106
+ )
107
+
108
+ logger.info("Model loaded. Applying LoRA adapters (rank %d)...", LORA_RANK)
109
+
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # 3. Apply LoRA adapters (fresh — trains on full R4 dataset)
113
+ # ---------------------------------------------------------------------------
114
+
115
+ model = FastVisionModel.get_peft_model(
116
+ model,
117
+ r=LORA_RANK,
118
+ lora_alpha=LORA_ALPHA,
119
+ lora_dropout=LORA_DROPOUT,
120
+ target_modules=[
121
+ "q_proj", "k_proj", "v_proj", "o_proj",
122
+ "gate_proj", "up_proj", "down_proj",
123
+ ],
124
+ bias="none",
125
+ use_gradient_checkpointing="unsloth",
126
+ random_state=42,
127
+ )
128
+
129
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
130
+ total_params = sum(p.numel() for p in model.parameters())
131
+ logger.info(
132
+ "LoRA applied: %s trainable / %s total (%.2f%%)",
133
+ f"{trainable_params:,}", f"{total_params:,}",
134
+ 100.0 * trainable_params / total_params,
135
+ )
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # 4. Load Round 4 dataset
140
+ # ---------------------------------------------------------------------------
141
+
142
+ logger.info("Loading dataset: %s / %s", DATASET_ID, DATA_FILE)
143
+
144
+ from datasets import load_dataset
145
+
146
+ dataset = load_dataset(DATASET_ID, data_files=DATA_FILE, split="train")
147
+ logger.info("Dataset loaded: %d examples", len(dataset))
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # 5. Format conversations with tokenizer chat template
152
+ # ---------------------------------------------------------------------------
153
+
154
+ logger.info("Formatting conversations...")
155
+
156
+ def format_conversations(examples):
157
+ texts = []
158
+ for messages in examples["messages"]:
159
+ text = tokenizer.apply_chat_template(
160
+ messages, tokenize=False, add_generation_prompt=False,
161
+ )
162
+ texts.append(text)
163
+ return {"text": texts}
164
+
165
+ dataset = dataset.map(
166
+ format_conversations, batched=True,
167
+ remove_columns=dataset.column_names, desc="Applying chat template",
168
+ )
169
+
170
+ logger.info("Dataset formatted: %d examples", len(dataset))
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # 6. Configure SFTTrainer
175
+ # ---------------------------------------------------------------------------
176
+
177
+ logger.info("Configuring SFTTrainer (Round 4 — CONCISE focus)...")
178
+
179
+ from trl import SFTTrainer, SFTConfig
180
+
181
+ sft_config = SFTConfig(
182
+ output_dir=OUTPUT_DIR,
183
+ save_strategy="epoch",
184
+ num_train_epochs=TRAIN_EPOCHS,
185
+ per_device_train_batch_size=BATCH_SIZE,
186
+ gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
187
+ learning_rate=LEARNING_RATE,
188
+ lr_scheduler_type="cosine",
189
+ warmup_ratio=WARMUP_RATIO,
190
+ weight_decay=WEIGHT_DECAY,
191
+ bf16=True,
192
+ fp16=False,
193
+ max_seq_length=MAX_SEQ_LENGTH,
194
+ logging_steps=LOGGING_STEPS,
195
+ logging_first_step=True,
196
+ report_to="trackio",
197
+ run_name="qr-verse-ai-round4",
198
+ dataset_text_field="text",
199
+ packing=False,
200
+ push_to_hub=True,
201
+ hub_model_id=OUTPUT_REPO,
202
+ hub_strategy="every_save",
203
+ hub_private_repo=True,
204
+ seed=42,
205
+ data_seed=42,
206
+ remove_unused_columns=True,
207
+ )
208
+
209
+ trainer = SFTTrainer(
210
+ model=model, tokenizer=tokenizer,
211
+ train_dataset=dataset, args=sft_config,
212
+ )
213
+
214
+ logger.info(
215
+ "SFTTrainer: %d epochs, lr=%.0e, batch=%d (eff=%d), %d examples, LoRA r=%d",
216
+ TRAIN_EPOCHS, LEARNING_RATE,
217
+ BATCH_SIZE, BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS,
218
+ len(dataset), LORA_RANK,
219
+ )
220
+
221
+
222
+ # ---------------------------------------------------------------------------
223
+ # 7. Train
224
+ # ---------------------------------------------------------------------------
225
+
226
+ logger.info("Starting Round 4 training (CONCISE focus, 6000 examples)...")
227
+ train_result = trainer.train()
228
+
229
+
230
+ # ---------------------------------------------------------------------------
231
+ # 8. Log metrics
232
+ # ---------------------------------------------------------------------------
233
+
234
+ metrics = train_result.metrics
235
+ logger.info("=" * 60)
236
+ logger.info("ROUND 4 TRAINING COMPLETE")
237
+ logger.info("=" * 60)
238
+ logger.info(" Train loss: %.4f", metrics.get("train_loss", 0))
239
+ logger.info(" Runtime: %.1f seconds", metrics.get("train_runtime", 0))
240
+ logger.info(" Samples/sec: %.2f", metrics.get("train_samples_per_second", 0))
241
+ logger.info(" Epochs: %d", TRAIN_EPOCHS)
242
+ logger.info(" LoRA rank: %d", LORA_RANK)
243
+ logger.info("=" * 60)
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # 9. Save and push LoRA adapter
248
+ # ---------------------------------------------------------------------------
249
+
250
+ LOCAL_ADAPTER_DIR = os.path.join(OUTPUT_DIR, "final-adapter-r4")
251
+ logger.info("Saving LoRA adapter: %s", LOCAL_ADAPTER_DIR)
252
+ model.save_pretrained(LOCAL_ADAPTER_DIR)
253
+ tokenizer.save_pretrained(LOCAL_ADAPTER_DIR)
254
+
255
+ logger.info("Pushing Round 4 adapter to Hub: %s", OUTPUT_REPO)
256
+ commit_msg = f"Round 4 LoRA: {len(dataset)} examples, {TRAIN_EPOCHS} epochs, lr {LEARNING_RATE}, rank {LORA_RANK}, CONCISE focus"
257
+
258
+ # Try push_to_hub first, fallback to upload_folder if LFS 403
259
+ try:
260
+ model.push_to_hub(
261
+ OUTPUT_REPO, tokenizer=tokenizer,
262
+ commit_message=commit_msg,
263
+ private=False,
264
+ )
265
+ logger.info("Round 4 adapter pushed via push_to_hub")
266
+ except Exception as e:
267
+ logger.warning("push_to_hub failed: %s — trying upload_folder fallback...", e)
268
+ from huggingface_hub import HfApi
269
+ api = HfApi()
270
+ api.upload_folder(
271
+ folder_path=LOCAL_ADAPTER_DIR,
272
+ repo_id=OUTPUT_REPO,
273
+ repo_type="model",
274
+ commit_message=commit_msg,
275
+ )
276
+ logger.info("Round 4 adapter pushed via upload_folder fallback")
277
+
278
+ logger.info("Round 4 adapter on Hub: https://huggingface.co/%s", OUTPUT_REPO)
279
+
280
+ print("\n" + "=" * 60)
281
+ print("ROUND 4 COMPLETE")
282
+ print("=" * 60)
283
+ print(f" Dataset: {len(dataset)} examples ({DATA_FILE})")
284
+ print(f" LoRA: rank {LORA_RANK}, alpha {LORA_ALPHA}")
285
+ print(f" LR: {LEARNING_RATE}")
286
+ print(f" Final loss: {metrics.get('train_loss', 'N/A')}")
287
+ print(f" Hub: https://huggingface.co/{OUTPUT_REPO}")
288
+ print()
289
+ print("Next steps:")
290
+ print(" 1. Run convert-gguf-hf-jobs.py (F16 GGUF)")
291
+ print(" 2. Run quantize-gguf-hf-jobs.py (Q4_K_M)")
292
+ print(" 3. Download Q4_K_M + Modelfile -> ollama create qr-verse-ai")
293
+ print("=" * 60)