pliny-the-prompter commited on
Commit
cd8d146
Β·
verified Β·
1 Parent(s): b50f62f

Upload 132 files

Browse files
Files changed (3) hide show
  1. app.py +53 -7
  2. obliteratus/models/loader.py +16 -0
  3. requirements.txt +1 -0
app.py CHANGED
@@ -135,6 +135,11 @@ def _load_model_to_device(
135
 
136
  model = AutoModelForCausalLM.from_pretrained(pretrained_path, **kwargs)
137
 
 
 
 
 
 
138
  # On MPS / CPU: model loaded without device_map, move to best device
139
  if not dev.supports_device_map_auto():
140
  target = dev.get_device()
@@ -243,9 +248,12 @@ def _recover_sessions_from_disk() -> None:
243
  _obliterate_counter = idx + 1
244
  except (ValueError, IndexError):
245
  pass
246
- # If we recovered sessions but _state has no output_dir, set it to the
247
- # most recent checkpoint so chat_respond can reload from disk.
248
- if found_any and not _state.get("output_dir"):
 
 
 
249
  with _lock:
250
  latest = _last_obliterated_label
251
  if latest and latest in _session_models:
@@ -854,6 +862,13 @@ def _cleanup_disk():
854
  # Clear session model cache (checkpoints are gone)
855
  _session_models.clear()
856
 
 
 
 
 
 
 
 
857
  # Also clear GPU
858
  _clear_gpu()
859
 
@@ -1968,6 +1983,7 @@ def obliterate(model_choice: str, method_choice: str,
1968
  on_stage=on_stage,
1969
  on_log=on_log,
1970
  )
 
1971
  pipeline_ref[0] = pipeline
1972
  pipeline.run_informed()
1973
  else:
@@ -2023,6 +2039,7 @@ def obliterate(model_choice: str, method_choice: str,
2023
  cot_aware=adv_cot_aware,
2024
  n_sae_features=int(adv_n_sae_features),
2025
  )
 
2026
  pipeline_ref[0] = pipeline
2027
  pipeline.run()
2028
  except Exception as e:
@@ -2047,8 +2064,8 @@ def obliterate(model_choice: str, method_choice: str,
2047
  worker = threading.Thread(target=run_pipeline, daemon=True)
2048
  worker.start()
2049
 
2050
- # Stream log updates while pipeline runs (max 45 minutes to prevent indefinite hang)
2051
- _max_pipeline_secs = 45 * 60
2052
  _pipeline_start = time.time()
2053
  status_msg = "**Obliterating\u2026** (0s)"
2054
  while worker.is_alive():
@@ -2059,7 +2076,7 @@ def obliterate(model_choice: str, method_choice: str,
2059
  else:
2060
  yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
2061
  if time.time() - _pipeline_start > _max_pipeline_secs:
2062
- log_lines.append("\nTIMEOUT: Pipeline exceeded 45-minute limit.")
2063
  break
2064
  time.sleep(0.5)
2065
 
@@ -2392,6 +2409,17 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
2392
  if not checkpoint or not Path(checkpoint).exists():
2393
  _recover_sessions_from_disk()
2394
  checkpoint = _state.get("output_dir")
 
 
 
 
 
 
 
 
 
 
 
2395
  if checkpoint and Path(checkpoint).exists():
2396
  try:
2397
  is_preset = (_state.get("model_name") or "") in MODELS
@@ -2555,12 +2583,30 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
2555
  global _skip_session_load
2556
  if _skip_session_load > 0:
2557
  _skip_session_load -= 1
2558
- if choice and _state.get("status") == "ready":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2559
  yield (
2560
  f"**Ready!** `{choice}` is loaded β€” just type in the chat below.",
2561
  get_chat_header(),
2562
  )
2563
  return
 
2564
 
2565
  if not choice or choice not in _bench_configs:
2566
  # On ZeroGPU, global state may be lost between process restarts.
 
135
 
136
  model = AutoModelForCausalLM.from_pretrained(pretrained_path, **kwargs)
137
 
138
+ # Compat: some custom model code (ChatGLM/GLM-4) accesses config.max_length
139
+ # which was removed from PretrainedConfig in newer transformers.
140
+ if not hasattr(model.config, "max_length"):
141
+ model.config.max_length = 20
142
+
143
  # On MPS / CPU: model loaded without device_map, move to best device
144
  if not dev.supports_device_map_auto():
145
  target = dev.get_device()
 
248
  _obliterate_counter = idx + 1
249
  except (ValueError, IndexError):
250
  pass
251
+ # If we recovered sessions and _state has no valid output_dir, set it to
252
+ # the most recent checkpoint so chat_respond can reload from disk.
253
+ # Also overwrite a stale output_dir that points to a non-existent path.
254
+ _cur_dir = _state.get("output_dir")
255
+ _needs_update = not _cur_dir or not Path(_cur_dir).exists()
256
+ if found_any and _needs_update:
257
  with _lock:
258
  latest = _last_obliterated_label
259
  if latest and latest in _session_models:
 
862
  # Clear session model cache (checkpoints are gone)
863
  _session_models.clear()
864
 
865
+ # Clear stale output_dir reference (checkpoints were just deleted)
866
+ with _lock:
867
+ _state["output_dir"] = None
868
+ _state["model_name"] = None
869
+ _state["method"] = None
870
+ _state["status"] = "idle"
871
+
872
  # Also clear GPU
873
  _clear_gpu()
874
 
 
1983
  on_stage=on_stage,
1984
  on_log=on_log,
1985
  )
1986
+ pipeline._bayesian_trials = int(adv_bayesian_trials)
1987
  pipeline_ref[0] = pipeline
1988
  pipeline.run_informed()
1989
  else:
 
2039
  cot_aware=adv_cot_aware,
2040
  n_sae_features=int(adv_n_sae_features),
2041
  )
2042
+ pipeline._bayesian_trials = int(adv_bayesian_trials)
2043
  pipeline_ref[0] = pipeline
2044
  pipeline.run()
2045
  except Exception as e:
 
2064
  worker = threading.Thread(target=run_pipeline, daemon=True)
2065
  worker.start()
2066
 
2067
+ # Stream log updates while pipeline runs (max 400 hours for large-model Optuna optimization)
2068
+ _max_pipeline_secs = 400 * 60 * 60
2069
  _pipeline_start = time.time()
2070
  status_msg = "**Obliterating\u2026** (0s)"
2071
  while worker.is_alive():
 
2076
  else:
2077
  yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
2078
  if time.time() - _pipeline_start > _max_pipeline_secs:
2079
+ log_lines.append("\nTIMEOUT: Pipeline exceeded 400-hour limit.")
2080
  break
2081
  time.sleep(0.5)
2082
 
 
2409
  if not checkpoint or not Path(checkpoint).exists():
2410
  _recover_sessions_from_disk()
2411
  checkpoint = _state.get("output_dir")
2412
+ # If output_dir is still stale, scan session models for any valid checkpoint
2413
+ if not checkpoint or not Path(checkpoint).exists():
2414
+ for _sm in _session_models.values():
2415
+ _sm_dir = _sm.get("output_dir")
2416
+ if _sm_dir and Path(_sm_dir).exists():
2417
+ checkpoint = _sm_dir
2418
+ with _lock:
2419
+ _state["output_dir"] = _sm_dir
2420
+ _state["model_name"] = _sm.get("model_choice")
2421
+ _state["method"] = _sm.get("method")
2422
+ break
2423
  if checkpoint and Path(checkpoint).exists():
2424
  try:
2425
  is_preset = (_state.get("model_name") or "") in MODELS
 
2583
  global _skip_session_load
2584
  if _skip_session_load > 0:
2585
  _skip_session_load -= 1
2586
+ # Verify the model is actually usable β€” not just that status says "ready".
2587
+ # ZeroGPU can evict the model while status stays "ready", and the counter
2588
+ # can get out of sync if only one dropdown .change fires instead of both.
2589
+ with _lock:
2590
+ _model_ok = (
2591
+ _state.get("status") == "ready"
2592
+ and _state.get("model") is not None
2593
+ and _state.get("tokenizer") is not None
2594
+ )
2595
+ if choice and _model_ok:
2596
+ # Double-check model tensors aren't stale (meta device)
2597
+ try:
2598
+ _dev = next(_state["model"].parameters()).device
2599
+ if _dev.type == "meta":
2600
+ _model_ok = False
2601
+ except Exception:
2602
+ _model_ok = False
2603
+ if choice and _model_ok:
2604
  yield (
2605
  f"**Ready!** `{choice}` is loaded β€” just type in the chat below.",
2606
  get_chat_header(),
2607
  )
2608
  return
2609
+ # Model is stale or evicted β€” fall through to normal loading path
2610
 
2611
  if not choice or choice not in _bench_configs:
2612
  # On ZeroGPU, global state may be lost between process restarts.
obliteratus/models/loader.py CHANGED
@@ -465,6 +465,16 @@ def load_model(
465
  f"If this model requires custom code, pass trust_remote_code=True explicitly."
466
  ) from e
467
 
 
 
 
 
 
 
 
 
 
 
468
  # Memory estimation and warnings (skip for natively quantized models β€” estimate is wrong)
469
  native_quant = getattr(config, "quantization_config", None)
470
  est_gb = _estimate_model_memory_gb(config, torch_dtype) if native_quant is None else 0.0
@@ -629,6 +639,12 @@ def load_model(
629
 
630
  model.eval()
631
 
 
 
 
 
 
 
632
  # Free accelerator cache after loading
633
  dev.empty_cache()
634
 
 
465
  f"If this model requires custom code, pass trust_remote_code=True explicitly."
466
  ) from e
467
 
468
+ # ── Config compat: ensure generation-related attributes exist ──────
469
+ # Older PretrainedConfig had max_length (default 20) and other generation
470
+ # defaults. Newer transformers moved them to GenerationConfig, but some
471
+ # custom model code (ChatGLM, GLM-4) still accesses config.max_length
472
+ # directly. Patch them back so trust_remote_code models don't crash.
473
+ _gen_defaults = {"max_length": 20, "max_new_tokens": None}
474
+ for _attr, _default in _gen_defaults.items():
475
+ if not hasattr(config, _attr):
476
+ setattr(config, _attr, _default)
477
+
478
  # Memory estimation and warnings (skip for natively quantized models β€” estimate is wrong)
479
  native_quant = getattr(config, "quantization_config", None)
480
  est_gb = _estimate_model_memory_gb(config, torch_dtype) if native_quant is None else 0.0
 
639
 
640
  model.eval()
641
 
642
+ # Patch model.config with the same generation defaults (model.config may be
643
+ # a separate instance from the config we pre-patched above).
644
+ for _attr, _default in _gen_defaults.items():
645
+ if not hasattr(model.config, _attr):
646
+ setattr(model.config, _attr, _default)
647
+
648
  # Free accelerator cache after loading
649
  dev.empty_cache()
650
 
requirements.txt CHANGED
@@ -13,3 +13,4 @@ numpy>=1.24
13
  scikit-learn>=1.3
14
  tqdm>=4.64
15
  bitsandbytes>=0.46.1
 
 
13
  scikit-learn>=1.3
14
  tqdm>=4.64
15
  bitsandbytes>=0.46.1
16
+ optuna>=3.0