Ksjsjjdj commited on
Commit
dbb0ed0
·
verified ·
1 Parent(s): 499721a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -6,6 +6,7 @@ import uuid
6
  import time
7
  import sys
8
  import gc
 
9
  from datetime import datetime
10
  from concurrent.futures import ThreadPoolExecutor, as_completed
11
  from itertools import chain
@@ -36,6 +37,7 @@ logging.basicConfig(level=logging.CRITICAL, stream=sys.stderr)
36
  if torch.cuda.is_available():
37
  torch.backends.cuda.matmul.allow_tf32 = True
38
  torch.backends.cudnn.allow_tf32 = True
 
39
 
40
  JOBS = {}
41
 
@@ -107,7 +109,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
107
 
108
  job = JOBS[job_id]
109
  job.status = "RUNNING"
110
- job.add_log("System: Starting Neural Forge Engine...")
111
 
112
  try:
113
  if not hf_token.startswith("hf_"):
@@ -116,6 +118,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
116
  os.environ["WANDB_DISABLED"] = "true"
117
  os.environ["HF_TOKEN"] = hf_token
118
  os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
 
119
 
120
  login(token=hf_token)
121
  try:
@@ -151,9 +154,10 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
151
  return None
152
 
153
  streams = []
154
- job.set_progress(0.05, "Data: Connecting streams...")
155
 
156
- with ThreadPoolExecutor(max_workers=4) as executor:
 
157
  futures = []
158
  for ds_name in dataset_list:
159
  futures.append(executor.submit(load_single, ds_name, None))
@@ -166,7 +170,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
166
  if not streams:
167
  raise Exception("No valid datasets found")
168
 
169
- job.set_progress(0.1, f"Data: {len(streams)} sources active.")
170
 
171
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side="left", add_eos_token=True, add_bos_token=True)
172
  if tokenizer.pad_token is None:
@@ -180,16 +184,15 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
180
  text = str(item.get("text", item.get("content", str(item))))
181
  if len(text) < 10: continue
182
  batch_buffer.append(text)
183
- if len(batch_buffer) >= 20:
184
- for txt in batch_buffer:
185
- tokens = tokenizer(txt, truncation=True, max_length=1024)
186
- tokens["labels"] = tokens["input_ids"].copy()
187
- yield tokens
188
  batch_buffer = []
189
  except:
190
  continue
191
 
192
- job.set_progress(0.15, "Model: Loading weights...")
193
 
194
  torch.cuda.empty_cache()
195
  gc.collect()
@@ -198,7 +201,8 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
198
  model_name,
199
  trust_remote_code=True,
200
  device_map="auto",
201
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 
202
  )
203
 
204
  peft_config = LoraConfig(
@@ -221,14 +225,17 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
221
  gradient_accumulation_steps=4,
222
  max_steps=int(train_steps),
223
  learning_rate=learning_rate,
224
- optim="adamw_torch",
225
  logging_steps=1,
226
  save_strategy="steps",
227
  save_steps=max(10, int(int(train_steps)/5)),
228
  save_total_limit=2,
229
  report_to="none",
230
  fp16=True if torch.cuda.is_available() else False,
231
- disable_tqdm=True
 
 
 
232
  )
233
 
234
  dataset_iterable = IterableDataset.from_generator(process_stream_generator)
@@ -240,7 +247,7 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
240
  callbacks=[CustomTrainerCallback(job_id, hf_token, full_repo_id)]
241
  )
242
 
243
- job.set_progress(0.2, "Training: Phase initiated...")
244
  trainer.train()
245
 
246
  job.set_progress(0.9, "Processing: Merging tensors...")
@@ -254,7 +261,8 @@ def background_train_task(job_id, hf_token, model_name, new_repo_name, lora_r, l
254
  return_dict=True,
255
  torch_dtype=torch.float16,
256
  trust_remote_code=True,
257
- device_map="auto"
 
258
  )
259
 
260
  model_to_merge = PeftModel.from_pretrained(base_reload, output_dir)
@@ -337,7 +345,7 @@ def load_from_url(request: gr.Request):
337
  pass
338
  return gr.update(selected="launch_tab"), ""
339
 
340
- with gr.Blocks(title="Nucleus Enterprise") as demo:
341
  with gr.Column():
342
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
343
  gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")
 
6
  import time
7
  import sys
8
  import gc
9
+ import multiprocessing
10
  from datetime import datetime
11
  from concurrent.futures import ThreadPoolExecutor, as_completed
12
  from itertools import chain
 
37
  if torch.cuda.is_available():
38
  torch.backends.cuda.matmul.allow_tf32 = True
39
  torch.backends.cudnn.allow_tf32 = True
40
+ torch.backends.cudnn.benchmark = True
41
 
42
  JOBS = {}
43
 
 
109
 
110
  job = JOBS[job_id]
111
  job.status = "RUNNING"
112
+ job.add_log("System: Starting High-Performance Neural Engine...")
113
 
114
  try:
115
  if not hf_token.startswith("hf_"):
 
118
  os.environ["WANDB_DISABLED"] = "true"
119
  os.environ["HF_TOKEN"] = hf_token
120
  os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
121
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
122
 
123
  login(token=hf_token)
124
  try:
 
154
  return None
155
 
156
  streams = []
157
+ job.set_progress(0.05, "Data: Connecting streams (Multi-threaded)...")
158
 
159
+ cpu_count = multiprocessing.cpu_count()
160
+ with ThreadPoolExecutor(max_workers=cpu_count * 2) as executor:
161
  futures = []
162
  for ds_name in dataset_list:
163
  futures.append(executor.submit(load_single, ds_name, None))
 
170
  if not streams:
171
  raise Exception("No valid datasets found")
172
 
173
+ job.set_progress(0.1, f"Data: {len(streams)} high-speed sources active.")
174
 
175
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side="left", add_eos_token=True, add_bos_token=True)
176
  if tokenizer.pad_token is None:
 
184
  text = str(item.get("text", item.get("content", str(item))))
185
  if len(text) < 10: continue
186
  batch_buffer.append(text)
187
+ if len(batch_buffer) >= 100:
188
+ encoded_batch = tokenizer(batch_buffer, truncation=True, max_length=2048, padding=False)
189
+ for input_ids in encoded_batch["input_ids"]:
190
+ yield {"input_ids": input_ids, "labels": input_ids}
 
191
  batch_buffer = []
192
  except:
193
  continue
194
 
195
+ job.set_progress(0.15, "Model: Loading weights (Fast IO)...")
196
 
197
  torch.cuda.empty_cache()
198
  gc.collect()
 
201
  model_name,
202
  trust_remote_code=True,
203
  device_map="auto",
204
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
205
+ low_cpu_mem_usage=True
206
  )
207
 
208
  peft_config = LoraConfig(
 
225
  gradient_accumulation_steps=4,
226
  max_steps=int(train_steps),
227
  learning_rate=learning_rate,
228
+ optim="adamw_torch_fused" if torch.cuda.is_available() else "adamw_torch",
229
  logging_steps=1,
230
  save_strategy="steps",
231
  save_steps=max(10, int(int(train_steps)/5)),
232
  save_total_limit=2,
233
  report_to="none",
234
  fp16=True if torch.cuda.is_available() else False,
235
+ disable_tqdm=True,
236
+ dataloader_num_workers=4,
237
+ dataloader_pin_memory=True,
238
+ torch_compile=True if os.name == 'posix' else False
239
  )
240
 
241
  dataset_iterable = IterableDataset.from_generator(process_stream_generator)
 
247
  callbacks=[CustomTrainerCallback(job_id, hf_token, full_repo_id)]
248
  )
249
 
250
+ job.set_progress(0.2, "Training: Accelerated Phase initiated...")
251
  trainer.train()
252
 
253
  job.set_progress(0.9, "Processing: Merging tensors...")
 
261
  return_dict=True,
262
  torch_dtype=torch.float16,
263
  trust_remote_code=True,
264
+ device_map="auto",
265
+ low_cpu_mem_usage=True
266
  )
267
 
268
  model_to_merge = PeftModel.from_pretrained(base_reload, output_dir)
 
345
  pass
346
  return gr.update(selected="launch_tab"), ""
347
 
348
+ with gr.Blocks(title="Nucleus Enterprise", theme=gr.themes.Base()) as demo:
349
  with gr.Column():
350
  gr.Markdown("# ⚛️ NUCLEUS ENTERPRISE")
351
  gr.Markdown("Autonomous LLM Foundry | V5.0 Stable")