Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,7 +30,7 @@ class LogQueueCallback(TrainerCallback):
|
|
| 30 |
log_str = f"Step {state.global_step}: {json.dumps(logs)}\n"
|
| 31 |
self.log_queue.put(log_str)
|
| 32 |
|
| 33 |
-
def
|
| 34 |
"""Retrieves the username from the HF token."""
|
| 35 |
if not token:
|
| 36 |
return None
|
|
@@ -59,22 +59,20 @@ def train_thread_target(
|
|
| 59 |
result_queue
|
| 60 |
):
|
| 61 |
"""
|
| 62 |
-
Background thread for training.
|
| 63 |
"""
|
| 64 |
try:
|
| 65 |
-
#
|
| 66 |
-
final_token = token
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
username = get_user_info(final_token)
|
| 71 |
if not username:
|
| 72 |
-
raise ValueError("Invalid Hugging Face Token.
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
log_queue.put(f"
|
| 78 |
|
| 79 |
# Validation for Transformer logic
|
| 80 |
if n_embd % n_head != 0:
|
|
@@ -83,6 +81,7 @@ def train_thread_target(
|
|
| 83 |
# 1. Load Dataset
|
| 84 |
log_queue.put(f"π Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
|
| 85 |
try:
|
|
|
|
| 86 |
dataset = load_dataset(dataset_id, split=f"train[:{int(sample_limit)}]")
|
| 87 |
except Exception as e:
|
| 88 |
raise ValueError(f"Error loading dataset: {e}")
|
|
@@ -128,7 +127,7 @@ def train_thread_target(
|
|
| 128 |
log_queue.put("ποΈ Starting Training Loop...\n")
|
| 129 |
|
| 130 |
training_args = TrainingArguments(
|
| 131 |
-
output_dir="./
|
| 132 |
overwrite_output_dir=True,
|
| 133 |
num_train_epochs=epochs,
|
| 134 |
per_device_train_batch_size=int(batch_size),
|
|
@@ -137,7 +136,7 @@ def train_thread_target(
|
|
| 137 |
weight_decay=weight_decay,
|
| 138 |
warmup_steps=int(warmup_steps),
|
| 139 |
logging_steps=10,
|
| 140 |
-
save_strategy="no",
|
| 141 |
push_to_hub=False,
|
| 142 |
report_to="none",
|
| 143 |
use_cpu=not torch.cuda.is_available(),
|
|
@@ -154,18 +153,18 @@ def train_thread_target(
|
|
| 154 |
|
| 155 |
trainer.train()
|
| 156 |
|
| 157 |
-
# 5. Push to Hub
|
| 158 |
-
log_queue.put(f"βοΈ
|
| 159 |
model.push_to_hub(full_repo_id, token=final_token)
|
| 160 |
tokenizer.push_to_hub(full_repo_id, token=final_token)
|
| 161 |
|
| 162 |
-
result_queue.put(f"π Success!
|
| 163 |
|
| 164 |
except Exception as e:
|
| 165 |
log_queue.put(f"β Error: {str(e)}\n")
|
| 166 |
result_queue.put(None)
|
| 167 |
|
| 168 |
-
# ---
|
| 169 |
|
| 170 |
def train_and_push_generator(
|
| 171 |
token, dataset_id, model_name,
|
|
@@ -173,11 +172,10 @@ def train_and_push_generator(
|
|
| 173 |
epochs, lr, weight_decay, warmup_steps,
|
| 174 |
batch_size, grad_accumulation, sample_limit
|
| 175 |
):
|
| 176 |
-
# If UI token is empty, we attempt to use the environment variable secret
|
| 177 |
effective_token = token or os.environ.get("HF_TOKEN")
|
| 178 |
|
| 179 |
if not effective_token:
|
| 180 |
-
yield "Error: No Hugging Face Token found.
|
| 181 |
return
|
| 182 |
|
| 183 |
log_queue = queue.Queue()
|
|
@@ -204,31 +202,31 @@ def train_and_push_generator(
|
|
| 204 |
|
| 205 |
if not result_queue.empty():
|
| 206 |
result = result_queue.get()
|
| 207 |
-
yield logs_history, result or "
|
| 208 |
else:
|
| 209 |
-
yield logs_history, "Process
|
| 210 |
|
| 211 |
# --- UI Layout ---
|
| 212 |
|
| 213 |
-
with gr.Blocks(theme=gr.themes.
|
| 214 |
-
gr.Markdown("#
|
| 215 |
-
gr.Markdown("Configure
|
| 216 |
|
| 217 |
with gr.Row():
|
| 218 |
hf_token = gr.Textbox(
|
| 219 |
-
label="
|
| 220 |
placeholder="hf_...",
|
| 221 |
type="password",
|
| 222 |
-
info="
|
| 223 |
)
|
| 224 |
model_name_input = gr.Textbox(
|
| 225 |
-
label="Model
|
| 226 |
-
value="my-
|
| 227 |
-
placeholder="e.g. tiny-
|
| 228 |
)
|
| 229 |
|
| 230 |
with gr.Tabs():
|
| 231 |
-
with gr.TabItem("1.
|
| 232 |
with gr.Row():
|
| 233 |
dataset_input = gr.Textbox(
|
| 234 |
label="Dataset ID",
|
|
@@ -236,40 +234,39 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray
|
|
| 236 |
placeholder="e.g. wikitext"
|
| 237 |
)
|
| 238 |
sample_limit = gr.Number(
|
| 239 |
-
label="
|
| 240 |
-
value=
|
| 241 |
-
precision=0
|
| 242 |
-
info="Number of rows to use for training"
|
| 243 |
)
|
| 244 |
context_length = gr.Slider(
|
| 245 |
minimum=64, maximum=1024, value=128, step=64,
|
| 246 |
-
label="Max Context Length
|
| 247 |
)
|
| 248 |
|
| 249 |
-
with gr.TabItem("2.
|
| 250 |
with gr.Row():
|
| 251 |
-
layers = gr.Slider(minimum=1, maximum=
|
| 252 |
-
embd = gr.Slider(minimum=64, maximum=1024, value=
|
| 253 |
with gr.Row():
|
| 254 |
-
heads = gr.Slider(minimum=2, maximum=16, value=
|
| 255 |
-
gr.Markdown("
|
| 256 |
|
| 257 |
-
with gr.TabItem("3. Training
|
| 258 |
with gr.Row():
|
| 259 |
-
epochs = gr.Slider(minimum=1, maximum=
|
| 260 |
lr = gr.Number(label="Learning Rate", value=5e-4)
|
| 261 |
with gr.Row():
|
| 262 |
-
batch_size = gr.Slider(minimum=1, maximum=
|
| 263 |
-
grad_accumulation = gr.Slider(minimum=1, maximum=
|
| 264 |
with gr.Row():
|
| 265 |
weight_decay = gr.Slider(minimum=0.0, maximum=0.1, value=0.01, step=0.01, label="Weight Decay")
|
| 266 |
-
warmup_steps = gr.Number(label="Warmup Steps", value=
|
| 267 |
|
| 268 |
-
train_btn = gr.Button("
|
| 269 |
|
| 270 |
with gr.Row():
|
| 271 |
-
log_output = gr.Code(label="
|
| 272 |
-
status_output = gr.Textbox(label="
|
| 273 |
|
| 274 |
train_btn.click(
|
| 275 |
fn=train_and_push_generator,
|
|
|
|
| 30 |
log_str = f"Step {state.global_step}: {json.dumps(logs)}\n"
|
| 31 |
self.log_queue.put(log_str)
|
| 32 |
|
| 33 |
+
def get_username(token):
|
| 34 |
"""Retrieves the username from the HF token."""
|
| 35 |
if not token:
|
| 36 |
return None
|
|
|
|
| 59 |
result_queue
|
| 60 |
):
|
| 61 |
"""
|
| 62 |
+
Background thread for training and pushing to user profile.
|
| 63 |
"""
|
| 64 |
try:
|
| 65 |
+
# 0. Auth & Identity
|
| 66 |
+
final_token = token or os.environ.get("HF_TOKEN")
|
| 67 |
+
username = get_username(final_token)
|
| 68 |
+
|
|
|
|
|
|
|
| 69 |
if not username:
|
| 70 |
+
raise ValueError("Invalid or missing Hugging Face Token. Ensure the token is provided or set as HF_TOKEN secret.")
|
| 71 |
|
| 72 |
+
# Target path is now the USER'S profile
|
| 73 |
+
full_repo_id = f"{username}/{model_name}"
|
| 74 |
+
log_queue.put(f"π Initializing for user: {username}\n")
|
| 75 |
+
log_queue.put(f"π¦ Target Repository: https://huggingface.co/{full_repo_id}\n")
|
| 76 |
|
| 77 |
# Validation for Transformer logic
|
| 78 |
if n_embd % n_head != 0:
|
|
|
|
| 81 |
# 1. Load Dataset
|
| 82 |
log_queue.put(f"π Loading dataset: {dataset_id} (Limit: {sample_limit})...\n")
|
| 83 |
try:
|
| 84 |
+
# We use the train split; user can specify limit
|
| 85 |
dataset = load_dataset(dataset_id, split=f"train[:{int(sample_limit)}]")
|
| 86 |
except Exception as e:
|
| 87 |
raise ValueError(f"Error loading dataset: {e}")
|
|
|
|
| 127 |
log_queue.put("ποΈ Starting Training Loop...\n")
|
| 128 |
|
| 129 |
training_args = TrainingArguments(
|
| 130 |
+
output_dir="./local_results",
|
| 131 |
overwrite_output_dir=True,
|
| 132 |
num_train_epochs=epochs,
|
| 133 |
per_device_train_batch_size=int(batch_size),
|
|
|
|
| 136 |
weight_decay=weight_decay,
|
| 137 |
warmup_steps=int(warmup_steps),
|
| 138 |
logging_steps=10,
|
| 139 |
+
save_strategy="no",
|
| 140 |
push_to_hub=False,
|
| 141 |
report_to="none",
|
| 142 |
use_cpu=not torch.cuda.is_available(),
|
|
|
|
| 153 |
|
| 154 |
trainer.train()
|
| 155 |
|
| 156 |
+
# 5. Push to User's Personal Hub
|
| 157 |
+
log_queue.put(f"βοΈ Uploading model to your profile...\n")
|
| 158 |
model.push_to_hub(full_repo_id, token=final_token)
|
| 159 |
tokenizer.push_to_hub(full_repo_id, token=final_token)
|
| 160 |
|
| 161 |
+
result_queue.put(f"π Success! Published to: https://huggingface.co/{full_repo_id}")
|
| 162 |
|
| 163 |
except Exception as e:
|
| 164 |
log_queue.put(f"β Error: {str(e)}\n")
|
| 165 |
result_queue.put(None)
|
| 166 |
|
| 167 |
+
# --- Generator for UI updates ---
|
| 168 |
|
| 169 |
def train_and_push_generator(
|
| 170 |
token, dataset_id, model_name,
|
|
|
|
| 172 |
epochs, lr, weight_decay, warmup_steps,
|
| 173 |
batch_size, grad_accumulation, sample_limit
|
| 174 |
):
|
|
|
|
| 175 |
effective_token = token or os.environ.get("HF_TOKEN")
|
| 176 |
|
| 177 |
if not effective_token:
|
| 178 |
+
yield "Error: No Hugging Face Token found. Please enter a 'Write' token below.", ""
|
| 179 |
return
|
| 180 |
|
| 181 |
log_queue = queue.Queue()
|
|
|
|
| 202 |
|
| 203 |
if not result_queue.empty():
|
| 204 |
result = result_queue.get()
|
| 205 |
+
yield logs_history, result or "Training failed. See logs."
|
| 206 |
else:
|
| 207 |
+
yield logs_history, "Process interrupted."
|
| 208 |
|
| 209 |
# --- UI Layout ---
|
| 210 |
|
| 211 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")) as demo:
|
| 212 |
+
gr.Markdown("# π Personal Auto-PreTrain")
|
| 213 |
+
gr.Markdown("Configure a custom GPT-2 architecture and train it directly to **your personal** Hugging Face profile.")
|
| 214 |
|
| 215 |
with gr.Row():
|
| 216 |
hf_token = gr.Textbox(
|
| 217 |
+
label="HF Write Token",
|
| 218 |
placeholder="hf_...",
|
| 219 |
type="password",
|
| 220 |
+
info="Required to create the repo on your profile. Must have 'Write' permissions."
|
| 221 |
)
|
| 222 |
model_name_input = gr.Textbox(
|
| 223 |
+
label="Model Name",
|
| 224 |
+
value="my-custom-gpt2",
|
| 225 |
+
placeholder="e.g. tiny-stories-v1"
|
| 226 |
)
|
| 227 |
|
| 228 |
with gr.Tabs():
|
| 229 |
+
with gr.TabItem("1. Data Selection"):
|
| 230 |
with gr.Row():
|
| 231 |
dataset_input = gr.Textbox(
|
| 232 |
label="Dataset ID",
|
|
|
|
| 234 |
placeholder="e.g. wikitext"
|
| 235 |
)
|
| 236 |
sample_limit = gr.Number(
|
| 237 |
+
label="Training Samples",
|
| 238 |
+
value=500,
|
| 239 |
+
precision=0
|
|
|
|
| 240 |
)
|
| 241 |
context_length = gr.Slider(
|
| 242 |
minimum=64, maximum=1024, value=128, step=64,
|
| 243 |
+
label="Max Context Length"
|
| 244 |
)
|
| 245 |
|
| 246 |
+
with gr.TabItem("2. Architecture"):
|
| 247 |
with gr.Row():
|
| 248 |
+
layers = gr.Slider(minimum=1, maximum=12, value=2, step=1, label="Layers")
|
| 249 |
+
embd = gr.Slider(minimum=64, maximum=1024, value=128, step=64, label="Embedding Dim")
|
| 250 |
with gr.Row():
|
| 251 |
+
heads = gr.Slider(minimum=2, maximum=16, value=4, step=2, label="Attention Heads")
|
| 252 |
+
gr.Markdown("_Note: Embedding Dim must be divisible by Attention Heads._")
|
| 253 |
|
| 254 |
+
with gr.TabItem("3. Training Settings"):
|
| 255 |
with gr.Row():
|
| 256 |
+
epochs = gr.Slider(minimum=1, maximum=20, value=1, step=1, label="Epochs")
|
| 257 |
lr = gr.Number(label="Learning Rate", value=5e-4)
|
| 258 |
with gr.Row():
|
| 259 |
+
batch_size = gr.Slider(minimum=1, maximum=32, value=4, step=1, label="Batch Size")
|
| 260 |
+
grad_accumulation = gr.Slider(minimum=1, maximum=16, value=1, step=1, label="Grad Accumulation")
|
| 261 |
with gr.Row():
|
| 262 |
weight_decay = gr.Slider(minimum=0.0, maximum=0.1, value=0.01, step=0.01, label="Weight Decay")
|
| 263 |
+
warmup_steps = gr.Number(label="Warmup Steps", value=50, precision=0)
|
| 264 |
|
| 265 |
+
train_btn = gr.Button("π₯ Start Training & Push to My Profile", variant="primary")
|
| 266 |
|
| 267 |
with gr.Row():
|
| 268 |
+
log_output = gr.Code(label="Training Progress", language="json", lines=12)
|
| 269 |
+
status_output = gr.Textbox(label="Final Status", interactive=False)
|
| 270 |
|
| 271 |
train_btn.click(
|
| 272 |
fn=train_and_push_generator,
|