rahul7star commited on
Commit
da6d1f1
·
verified ·
1 Parent(s): 4bb337f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -104,15 +104,26 @@ def train_model(base_model, dataset_name, num_epochs, batch_size, learning_rate,
104
  log_message(output_log, f" Columns: {train_dataset.column_names}")
105
 
106
  # ===== Format examples =====
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  def format_example(item):
108
- text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values())
109
- prompt = f"""<|system|>
110
- You are a wise teacher interpreting Bhagavad Gita with deep insights.
111
- <|user|>
112
- {text}
113
- <|assistant|>
114
- """
115
- return {"text": prompt}
116
 
117
  train_dataset = train_dataset.map(format_example)
118
  test_dataset = test_dataset.map(format_example)
@@ -177,7 +188,7 @@ You are a wise teacher interpreting Bhagavad Gita with deep insights.
177
  fp16=device == "cuda",
178
  optim="adamw_torch",
179
  learning_rate=learning_rate,
180
- max_steps=100, # Limit for demo
181
  )
182
 
183
  trainer = Trainer(
 
104
  log_message(output_log, f" Columns: {train_dataset.column_names}")
105
 
106
  # ===== Format examples =====
107
+ # def format_example(item):
108
+ # text = item.get("text") or item.get("content") or " ".join(str(v) for v in item.values())
109
+ # prompt = f"""<|system|>
110
+ # You are a wise teacher interpreting Bhagavad Gita with deep insights.
111
+ # <|user|>
112
+ # {text}
113
+ # <|assistant|>
114
+ # """
115
+ # return {"text": prompt}
116
+
117
+
118
+
119
+ # ===== Format examples dynamically =====
120
  def format_example(item):
121
+ text_content = item.get("text") or item.get("content") or str(item.get("path", "")) or " ".join(str(v) for v in item.values())
122
+ # Use shorter, clean system prompt + user content for better loss
123
+ prompt = (
124
+ f"<|system|>\nYou are an expert AI assistant.\n<|user|>\n{text_content}\n<|assistant|>\n"
125
+ )
126
+ return {"text": prompt}
 
 
127
 
128
  train_dataset = train_dataset.map(format_example)
129
  test_dataset = test_dataset.map(format_example)
 
188
  fp16=device == "cuda",
189
  optim="adamw_torch",
190
  learning_rate=learning_rate,
191
+ max_steps=500, # Limit for demo is 100
192
  )
193
 
194
  trainer = Trainer(