FlameF0X commited on
Commit
9d443a8
·
verified ·
1 Parent(s): 9f30647

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -76,6 +76,10 @@ def train_thread_target(
76
  full_repo_id = f"{org_name}/{model_name}"
77
  log_queue.put(f"🚀 Initializing for {full_repo_id}...\n")
78
 
 
 
 
 
79
  # 1. Load Dataset
80
  log_queue.put(f"📚 Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
81
  try:
@@ -253,7 +257,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray
253
  with gr.TabItem("3. Training Hyperparameters"):
254
  with gr.Row():
255
  epochs = gr.Slider(minimum=1, maximum=50, value=1, step=1, label="Epochs")
256
- lr = gr.Number(label="Learning Rate", value=5e-4, format="%.1e")
257
  with gr.Row():
258
  batch_size = gr.Slider(minimum=1, maximum=64, value=8, step=1, label="Batch Size (per device)")
259
  grad_accumulation = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Gradient Accumulation Steps")
 
76
  full_repo_id = f"{org_name}/{model_name}"
77
  log_queue.put(f"🚀 Initializing for {full_repo_id}...\n")
78
 
79
+ # Validation for Transformer logic
80
+ if n_embd % n_head != 0:
81
+ raise ValueError(f"Embedding dimension ({n_embd}) must be divisible by number of heads ({n_head}).")
82
+
83
  # 1. Load Dataset
84
  log_queue.put(f"📚 Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
85
  try:
 
257
  with gr.TabItem("3. Training Hyperparameters"):
258
  with gr.Row():
259
  epochs = gr.Slider(minimum=1, maximum=50, value=1, step=1, label="Epochs")
260
+ lr = gr.Number(label="Learning Rate", value=5e-4)
261
  with gr.Row():
262
  batch_size = gr.Slider(minimum=1, maximum=64, value=8, step=1, label="Batch Size (per device)")
263
  grad_accumulation = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Gradient Accumulation Steps")