Upload train_autogrow.py with huggingface_hub
Browse files- train_autogrow.py +55 -51
train_autogrow.py
CHANGED
|
@@ -12,10 +12,11 @@ from huggingface_hub import HfApi
|
|
| 12 |
# --- FAILPROOF CONFIG ---
|
| 13 |
MODEL_PATH = "./DiffReaper-Talk"
|
| 14 |
REPO_ID = "darwinkernelpanic/DiffReaper-5"
|
| 15 |
-
HF_TOKEN = "
|
| 16 |
OUTPUT_DIR = "./training_output"
|
| 17 |
LOG_FILE = "training.log"
|
| 18 |
-
|
|
|
|
| 19 |
LEARNING_RATE = 1e-4
|
| 20 |
SAVE_EVERY = 2500
|
| 21 |
TEST_EVERY = 500
|
|
@@ -79,23 +80,7 @@ class DiffReaperModel(nn.Module):
|
|
| 79 |
for block in self.blocks: x = block(x, t_emb)
|
| 80 |
return self.ln_f(x)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
| 84 |
-
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
|
| 85 |
-
|
| 86 |
-
model = DiffReaperModel(tokenizer.vocab_size, N_EMBD, N_HEAD, N_LAYER).to("cuda")
|
| 87 |
-
noise_scheduler = DDPMScheduler(num_train_timesteps=1000, beta_schedule="squaredcos_cap_v2")
|
| 88 |
-
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
|
| 89 |
-
|
| 90 |
-
log("Loading Dataset...")
|
| 91 |
-
dataset = load_dataset("OpenAssistant/oasst1", split="train")
|
| 92 |
-
def tokenize_function(examples):
|
| 93 |
-
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=TOTAL_LEN)
|
| 94 |
-
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
|
| 95 |
-
tokenized_dataset.set_format("torch")
|
| 96 |
-
dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=BATCH_SIZE, shuffle=True)
|
| 97 |
-
|
| 98 |
-
def run_test(step):
|
| 99 |
log(f"Running Cropmark Diagnostic [Step {step}]...")
|
| 100 |
model.eval()
|
| 101 |
with torch.no_grad():
|
|
@@ -113,37 +98,56 @@ def run_test(step):
|
|
| 113 |
norm_r = F.normalize(r_noise, dim=-1)
|
| 114 |
logits = torch.matmul(norm_r, norm_weights.T)
|
| 115 |
resp_ids = torch.argmax(logits, dim=-1)
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
| 117 |
model.train()
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# --- FAILPROOF CONFIG ---
|
| 13 |
MODEL_PATH = "./DiffReaper-Talk"
|
| 14 |
REPO_ID = "darwinkernelpanic/DiffReaper-5"
|
| 15 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 16 |
OUTPUT_DIR = "./training_output"
|
| 17 |
LOG_FILE = "training.log"
|
| 18 |
+
CHECKPOINT_LOG = "checkpoint_log.txt"
|
| 19 |
+
BATCH_SIZE = 32
|
| 20 |
LEARNING_RATE = 1e-4
|
| 21 |
SAVE_EVERY = 2500
|
| 22 |
TEST_EVERY = 500
|
|
|
|
| 80 |
for block in self.blocks: x = block(x, t_emb)
|
| 81 |
return self.ln_f(x)
|
| 82 |
|
| 83 |
+
def run_test(model, tokenizer, step):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
log(f"Running Cropmark Diagnostic [Step {step}]...")
|
| 85 |
model.eval()
|
| 86 |
with torch.no_grad():
|
|
|
|
| 98 |
norm_r = F.normalize(r_noise, dim=-1)
|
| 99 |
logits = torch.matmul(norm_r, norm_weights.T)
|
| 100 |
resp_ids = torch.argmax(logits, dim=-1)
|
| 101 |
+
result = tokenizer.decode(resp_ids[0], skip_special_tokens=True)
|
| 102 |
+
log(f"Prompt: '{prompt}' | [Cropmark]: '{result}'")
|
| 103 |
+
with open(CHECKPOINT_LOG, "a") as f:
|
| 104 |
+
f.write(f"Step {step} - Prompt: '{prompt}' | [Cropmark]: '{result}'\n")
|
| 105 |
model.train()
|
| 106 |
|
| 107 |
+
if __name__ == "__main__":
|
| 108 |
+
log("Initializing Autogrow Model...")
|
| 109 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
| 110 |
+
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
|
| 111 |
+
|
| 112 |
+
model = DiffReaperModel(tokenizer.vocab_size, N_EMBD, N_HEAD, N_LAYER).to("cuda")
|
| 113 |
+
noise_scheduler = DDPMScheduler(num_train_timesteps=1000, beta_schedule="squaredcos_cap_v2")
|
| 114 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
|
| 115 |
+
|
| 116 |
+
log("Loading Dataset...")
|
| 117 |
+
dataset = load_dataset("OpenAssistant/oasst1", split="train")
|
| 118 |
+
def tokenize_function(examples):
|
| 119 |
+
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=TOTAL_LEN)
|
| 120 |
+
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
|
| 121 |
+
tokenized_dataset.set_format("torch")
|
| 122 |
+
dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=BATCH_SIZE, shuffle=True)
|
| 123 |
+
|
| 124 |
+
log("Autonomous growth starting...")
|
| 125 |
+
api = HfApi()
|
| 126 |
+
start_time = time.time()
|
| 127 |
+
step = 0
|
| 128 |
+
while True:
|
| 129 |
+
for batch in dataloader:
|
| 130 |
+
optimizer.zero_grad()
|
| 131 |
+
input_ids = batch["input_ids"].to("cuda")
|
| 132 |
+
prompt_emb = model.token_embedding(input_ids[:, :MAX_PROMPT_LEN])
|
| 133 |
+
resp_emb = model.token_embedding(input_ids[:, MAX_PROMPT_LEN:])
|
| 134 |
+
noise = torch.randn_like(resp_emb)
|
| 135 |
+
t = torch.randint(0, 1000, (input_ids.shape[0],), device="cuda").long()
|
| 136 |
+
noisy_resp = noise_scheduler.add_noise(resp_emb, noise, t)
|
| 137 |
+
pred_resp = model(torch.cat([prompt_emb, noisy_resp], dim=1), t)[:, MAX_PROMPT_LEN:, :]
|
| 138 |
+
loss = 1 - F.cosine_similarity(pred_resp, resp_emb, dim=-1).mean()
|
| 139 |
+
loss.backward()
|
| 140 |
+
optimizer.step()
|
| 141 |
+
if step % 100 == 0:
|
| 142 |
+
elapsed = time.time() - start_time
|
| 143 |
+
log(f"Step {step} - Loss: {loss.item():.6f} - Speed: {(step+1)/elapsed:.2f} s/s")
|
| 144 |
+
if step > 0 and step % TEST_EVERY == 0: run_test(model, tokenizer, step)
|
| 145 |
+
if step > 0 and step % SAVE_EVERY == 0:
|
| 146 |
+
ckpt_path = os.path.join(OUTPUT_DIR, f"cropmark_{step}.pt")
|
| 147 |
+
torch.save(model.state_dict(), ckpt_path)
|
| 148 |
+
log("Syncing to HF...")
|
| 149 |
+
try:
|
| 150 |
+
api.upload_file(path_or_fileobj=ckpt_path, path_in_repo=f"cropmark_{step}.pt", repo_id=REPO_ID, token=HF_TOKEN)
|
| 151 |
+
api.upload_file(path_or_fileobj="train_autogrow.py", path_in_repo="train_autogrow.py", repo_id=REPO_ID, token=HF_TOKEN)
|
| 152 |
+
except Exception as e: log(f"HF Sync Error: {e}")
|
| 153 |
+
step += 1
|