Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -104,15 +104,26 @@ def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate,
|
|
| 104 |
log_message(output_log, f" Columns: {train_dataset.column_names}")
|
| 105 |
|
| 106 |
# ===== Format examples =====
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
def format_example(item):
|
| 108 |
-
|
| 109 |
-
prompt
|
| 110 |
-
|
| 111 |
-
<|user
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
"""
|
| 115 |
-
return {"text": prompt}
|
| 116 |
|
| 117 |
train_dataset = train_dataset.map(format_example)
|
| 118 |
test_dataset = test_dataset.map(format_example)
|
|
@@ -177,7 +188,7 @@ You are a wise teacher interpreting Bhagavad Gita with deep insights.
|
|
| 177 |
fp16=device == "cuda",
|
| 178 |
optim="adamw_torch",
|
| 179 |
learning_rate=learning_rate,
|
| 180 |
-
max_steps=
|
| 181 |
)
|
| 182 |
|
| 183 |
trainer = Trainer(
|
|
|
|
| 104 |
log_message(output_log, f" Columns: {train_dataset.column_names}")
|
| 105 |
|
| 106 |
# ===== Format examples =====
|
| 107 |
+
# def format_example(item):
|
| 108 |
+
# text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values())
|
| 109 |
+
# prompt = f"""<|system|>
|
| 110 |
+
# You are a wise teacher interpreting Bhagavad Gita with deep insights.
|
| 111 |
+
# <|user|>
|
| 112 |
+
# {text}
|
| 113 |
+
# <|assistant|>
|
| 114 |
+
# """
|
| 115 |
+
# return {"text": prompt}
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ===== Format examples dynamically =====
|
| 120 |
def format_example(item):
|
| 121 |
+
text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values())
|
| 122 |
+
# Use shorter, clean system prompt + user content for better loss
|
| 123 |
+
prompt = (
|
| 124 |
+
f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n"
|
| 125 |
+
)
|
| 126 |
+
return {"text": prompt}
|
|
|
|
|
|
|
| 127 |
|
| 128 |
train_dataset = train_dataset.map(format_example)
|
| 129 |
test_dataset = test_dataset.map(format_example)
|
|
|
|
| 188 |
fp16=device == "cuda",
|
| 189 |
optim="adamw_torch",
|
| 190 |
learning_rate=learning_rate,
|
| 191 |
+
max_steps=500, # Limit for demo is 100
|
| 192 |
)
|
| 193 |
|
| 194 |
trainer = Trainer(
|