Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -76,6 +76,10 @@ def train_thread_target(
|
|
| 76 |
full_repo_id = f"{org_name}/{model_name}"
|
| 77 |
log_queue.put(f"🚀 Initializing for {full_repo_id}...\n")
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# 1. Load Dataset
|
| 80 |
log_queue.put(f"📚 Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
|
| 81 |
try:
|
|
@@ -253,7 +257,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray
|
|
| 253 |
with gr.TabItem("3. Training Hyperparameters"):
|
| 254 |
with gr.Row():
|
| 255 |
epochs = gr.Slider(minimum=1, maximum=50, value=1, step=1, label="Epochs")
|
| 256 |
-
lr = gr.Number(label="Learning Rate", value=5e-4
|
| 257 |
with gr.Row():
|
| 258 |
batch_size = gr.Slider(minimum=1, maximum=64, value=8, step=1, label="Batch Size (per device)")
|
| 259 |
grad_accumulation = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Gradient Accumulation Steps")
|
|
|
|
| 76 |
full_repo_id = f"{org_name}/{model_name}"
|
| 77 |
log_queue.put(f"🚀 Initializing for {full_repo_id}...\n")
|
| 78 |
|
| 79 |
+
# Validation for Transformer logic
|
| 80 |
+
if n_embd % n_head != 0:
|
| 81 |
+
raise ValueError(f"Embedding dimension ({n_embd}) must be divisible by number of heads ({n_head}).")
|
| 82 |
+
|
| 83 |
# 1. Load Dataset
|
| 84 |
log_queue.put(f"📚 Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
|
| 85 |
try:
|
|
|
|
| 257 |
with gr.TabItem("3. Training Hyperparameters"):
|
| 258 |
with gr.Row():
|
| 259 |
epochs = gr.Slider(minimum=1, maximum=50, value=1, step=1, label="Epochs")
|
| 260 |
+
lr = gr.Number(label="Learning Rate", value=5e-4)
|
| 261 |
with gr.Row():
|
| 262 |
batch_size = gr.Slider(minimum=1, maximum=64, value=8, step=1, label="Batch Size (per device)")
|
| 263 |
grad_accumulation = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Gradient Accumulation Steps")
|