Qwen-Training

Running

rahul7star commited on Oct 17

Commit

da6d1f1

verified ·

1 Parent(s): 4bb337f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -104,15 +104,26 @@ def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate,
         log_message(output_log, f"   Columns: {train_dataset.column_names}")
         # ===== Format examples =====
         def format_example(item):
-            text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values())
-            prompt = f"""<|system|>
-You are a wise teacher interpreting Bhagavad Gita with deep insights.
-<|user|>
-{text}
-<|assistant|>
-"""
-            return {"text": prompt}
         train_dataset = train_dataset.map(format_example)
         test_dataset = test_dataset.map(format_example)
@@ -177,7 +188,7 @@ You are a wise teacher interpreting Bhagavad Gita with deep insights.
             fp16=device == "cuda",
             optim="adamw_torch",
             learning_rate=learning_rate,
-            max_steps=100,  # Limit for demo
         )
         trainer = Trainer(

         log_message(output_log, f"   Columns: {train_dataset.column_names}")
         # ===== Format examples =====
+#         def format_example(item):
+#             text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values())
+#             prompt = f"""<|system|>
+# You are a wise teacher interpreting Bhagavad Gita with deep insights.
+# <|user|>
+# {text}
+# <|assistant|>
+# """
+#             return {"text": prompt}
+         # ===== Format examples dynamically =====
         def format_example(item):
+            text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values())
+            # Use shorter, clean system prompt + user content for better loss
+            prompt = (
+                f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n"
+            )
+            return {"text": prompt}
         train_dataset = train_dataset.map(format_example)
         test_dataset = test_dataset.map(format_example)
             fp16=device == "cuda",
             optim="adamw_torch",
             learning_rate=learning_rate,
+            max_steps=500,  # Limit for demo is 100
         )
         trainer = Trainer(