APT-product

Sleeping

FlameF0X commited on 25 days ago

Commit

9d443a8

verified ·

1 Parent(s): 9f30647

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,6 +76,10 @@ def train_thread_target(
         full_repo_id = f"{org_name}/{model_name}"
         log_queue.put(f"🚀 Initializing for {full_repo_id}...\n")
         # 1. Load Dataset
         log_queue.put(f"📚 Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
         try:
@@ -253,7 +257,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray
         with gr.TabItem("3. Training Hyperparameters"):
             with gr.Row():
                 epochs = gr.Slider(minimum=1, maximum=50, value=1, step=1, label="Epochs")
-                lr = gr.Number(label="Learning Rate", value=5e-4, format="%.1e")
             with gr.Row():
                 batch_size = gr.Slider(minimum=1, maximum=64, value=8, step=1, label="Batch Size (per device)")
                 grad_accumulation = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Gradient Accumulation Steps")

         full_repo_id = f"{org_name}/{model_name}"
         log_queue.put(f"🚀 Initializing for {full_repo_id}...\n")
+        # Validation for Transformer logic
+        if n_embd % n_head != 0:
+            raise ValueError(f"Embedding dimension ({n_embd}) must be divisible by number of heads ({n_head}).")
         # 1. Load Dataset
         log_queue.put(f"📚 Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
         try:
         with gr.TabItem("3. Training Hyperparameters"):
             with gr.Row():
                 epochs = gr.Slider(minimum=1, maximum=50, value=1, step=1, label="Epochs")
+                lr = gr.Number(label="Learning Rate", value=5e-4)
             with gr.Row():
                 batch_size = gr.Slider(minimum=1, maximum=64, value=8, step=1, label="Batch Size (per device)")
                 grad_accumulation = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Gradient Accumulation Steps")