darwinkernelpanic commited on
Commit
36a9464
·
verified ·
1 Parent(s): 5553bc7

Upload train_autogrow.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_autogrow.py +55 -51
train_autogrow.py CHANGED
@@ -12,10 +12,11 @@ from huggingface_hub import HfApi
12
  # --- FAILPROOF CONFIG ---
13
  MODEL_PATH = "./DiffReaper-Talk"
14
  REPO_ID = "darwinkernelpanic/DiffReaper-5"
15
- HF_TOKEN = "${HF_TOKEN}"
16
  OUTPUT_DIR = "./training_output"
17
  LOG_FILE = "training.log"
18
- BATCH_SIZE = 16 # Lower for 3090 VRAM
 
19
  LEARNING_RATE = 1e-4
20
  SAVE_EVERY = 2500
21
  TEST_EVERY = 500
@@ -79,23 +80,7 @@ class DiffReaperModel(nn.Module):
79
  for block in self.blocks: x = block(x, t_emb)
80
  return self.ln_f(x)
81
 
82
- log("Initializing Autogrow Model...")
83
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
84
- if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
85
-
86
- model = DiffReaperModel(tokenizer.vocab_size, N_EMBD, N_HEAD, N_LAYER).to("cuda")
87
- noise_scheduler = DDPMScheduler(num_train_timesteps=1000, beta_schedule="squaredcos_cap_v2")
88
- optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
89
-
90
- log("Loading Dataset...")
91
- dataset = load_dataset("OpenAssistant/oasst1", split="train")
92
- def tokenize_function(examples):
93
- return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=TOTAL_LEN)
94
- tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
95
- tokenized_dataset.set_format("torch")
96
- dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=BATCH_SIZE, shuffle=True)
97
-
98
- def run_test(step):
99
  log(f"Running Cropmark Diagnostic [Step {step}]...")
100
  model.eval()
101
  with torch.no_grad():
@@ -113,37 +98,56 @@ def run_test(step):
113
  norm_r = F.normalize(r_noise, dim=-1)
114
  logits = torch.matmul(norm_r, norm_weights.T)
115
  resp_ids = torch.argmax(logits, dim=-1)
116
- log(f"Prompt: '{prompt}' | [Cropmark]: '{tokenizer.decode(resp_ids[0], skip_special_tokens=True)}'")
 
 
 
117
  model.train()
118
 
119
- log("Autonomous growth starting...")
120
- api = HfApi()
121
- start_time = time.time()
122
- step = 0
123
- while True: # Unlimited steps, controlled by your credit
124
- for batch in dataloader:
125
- optimizer.zero_grad()
126
- input_ids = batch["input_ids"].to("cuda")
127
- prompt_emb = model.token_embedding(input_ids[:, :MAX_PROMPT_LEN])
128
- resp_emb = model.token_embedding(input_ids[:, MAX_PROMPT_LEN:])
129
-
130
- noise = torch.randn_like(resp_emb)
131
- t = torch.randint(0, 1000, (input_ids.shape[0],), device="cuda").long()
132
- noisy_resp = noise_scheduler.add_noise(resp_emb, noise, t)
133
-
134
- pred_resp = model(torch.cat([prompt_emb, noisy_resp], dim=1), t)[:, MAX_PROMPT_LEN:, :]
135
- loss = 1 - F.cosine_similarity(pred_resp, resp_emb, dim=-1).mean()
136
- loss.backward()
137
- optimizer.step()
138
-
139
- if step % 100 == 0:
140
- elapsed = time.time() - start_time
141
- log(f"Step {step} - Loss: {loss.item():.6f} - Speed: {(step+1)/elapsed:.2f} s/s")
142
- if step > 0 and step % TEST_EVERY == 0: run_test(step)
143
- if step > 0 and step % SAVE_EVERY == 0:
144
- ckpt_path = os.path.join(OUTPUT_DIR, f"cropmark_latest.pt")
145
- torch.save(model.state_dict(), ckpt_path)
146
- log("Syncing to HF...")
147
- try: api.upload_file(path_or_fileobj=ckpt_path, path_in_repo="cropmark_latest.pt", repo_id=REPO_ID, token=HF_TOKEN)
148
- except Exception as e: log(f"HF Sync Error: {e}")
149
- step += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # --- FAILPROOF CONFIG ---
13
  MODEL_PATH = "./DiffReaper-Talk"
14
  REPO_ID = "darwinkernelpanic/DiffReaper-5"
15
+ HF_TOKEN = os.getenv("HF_TOKEN")
16
  OUTPUT_DIR = "./training_output"
17
  LOG_FILE = "training.log"
18
+ CHECKPOINT_LOG = "checkpoint_log.txt"
19
+ BATCH_SIZE = 32
20
  LEARNING_RATE = 1e-4
21
  SAVE_EVERY = 2500
22
  TEST_EVERY = 500
 
80
  for block in self.blocks: x = block(x, t_emb)
81
  return self.ln_f(x)
82
 
83
+ def run_test(model, tokenizer, step):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  log(f"Running Cropmark Diagnostic [Step {step}]...")
85
  model.eval()
86
  with torch.no_grad():
 
98
  norm_r = F.normalize(r_noise, dim=-1)
99
  logits = torch.matmul(norm_r, norm_weights.T)
100
  resp_ids = torch.argmax(logits, dim=-1)
101
+ result = tokenizer.decode(resp_ids[0], skip_special_tokens=True)
102
+ log(f"Prompt: '{prompt}' | [Cropmark]: '{result}'")
103
+ with open(CHECKPOINT_LOG, "a") as f:
104
+ f.write(f"Step {step} - Prompt: '{prompt}' | [Cropmark]: '{result}'\n")
105
  model.train()
106
 
107
+ if __name__ == "__main__":
108
+ log("Initializing Autogrow Model...")
109
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
110
+ if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
111
+
112
+ model = DiffReaperModel(tokenizer.vocab_size, N_EMBD, N_HEAD, N_LAYER).to("cuda")
113
+ noise_scheduler = DDPMScheduler(num_train_timesteps=1000, beta_schedule="squaredcos_cap_v2")
114
+ optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
115
+
116
+ log("Loading Dataset...")
117
+ dataset = load_dataset("OpenAssistant/oasst1", split="train")
118
+ def tokenize_function(examples):
119
+ return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=TOTAL_LEN)
120
+ tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)
121
+ tokenized_dataset.set_format("torch")
122
+ dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size=BATCH_SIZE, shuffle=True)
123
+
124
+ log("Autonomous growth starting...")
125
+ api = HfApi()
126
+ start_time = time.time()
127
+ step = 0
128
+ while True:
129
+ for batch in dataloader:
130
+ optimizer.zero_grad()
131
+ input_ids = batch["input_ids"].to("cuda")
132
+ prompt_emb = model.token_embedding(input_ids[:, :MAX_PROMPT_LEN])
133
+ resp_emb = model.token_embedding(input_ids[:, MAX_PROMPT_LEN:])
134
+ noise = torch.randn_like(resp_emb)
135
+ t = torch.randint(0, 1000, (input_ids.shape[0],), device="cuda").long()
136
+ noisy_resp = noise_scheduler.add_noise(resp_emb, noise, t)
137
+ pred_resp = model(torch.cat([prompt_emb, noisy_resp], dim=1), t)[:, MAX_PROMPT_LEN:, :]
138
+ loss = 1 - F.cosine_similarity(pred_resp, resp_emb, dim=-1).mean()
139
+ loss.backward()
140
+ optimizer.step()
141
+ if step % 100 == 0:
142
+ elapsed = time.time() - start_time
143
+ log(f"Step {step} - Loss: {loss.item():.6f} - Speed: {(step+1)/elapsed:.2f} s/s")
144
+ if step > 0 and step % TEST_EVERY == 0: run_test(model, tokenizer, step)
145
+ if step > 0 and step % SAVE_EVERY == 0:
146
+ ckpt_path = os.path.join(OUTPUT_DIR, f"cropmark_{step}.pt")
147
+ torch.save(model.state_dict(), ckpt_path)
148
+ log("Syncing to HF...")
149
+ try:
150
+ api.upload_file(path_or_fileobj=ckpt_path, path_in_repo=f"cropmark_{step}.pt", repo_id=REPO_ID, token=HF_TOKEN)
151
+ api.upload_file(path_or_fileobj="train_autogrow.py", path_in_repo="train_autogrow.py", repo_id=REPO_ID, token=HF_TOKEN)
152
+ except Exception as e: log(f"HF Sync Error: {e}")
153
+ step += 1