obliteratus

Running on Zero

App Files Files Community

pliny-the-prompter commited on Mar 10

Commit

cd8d146

verified ·

1 Parent(s): b50f62f

Upload 132 files

Browse files

Files changed (3) hide show

app.py +53 -7
obliteratus/models/loader.py +16 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -135,6 +135,11 @@ def _load_model_to_device(
     model = AutoModelForCausalLM.from_pretrained(pretrained_path, **kwargs)
     # On MPS / CPU: model loaded without device_map, move to best device
     if not dev.supports_device_map_auto():
         target = dev.get_device()
@@ -243,9 +248,12 @@ def _recover_sessions_from_disk() -> None:
                         _obliterate_counter = idx + 1
                 except (ValueError, IndexError):
                     pass
-    # If we recovered sessions but _state has no output_dir, set it to the
-    # most recent checkpoint so chat_respond can reload from disk.
-    if found_any and not _state.get("output_dir"):
         with _lock:
             latest = _last_obliterated_label
             if latest and latest in _session_models:
@@ -854,6 +862,13 @@ def _cleanup_disk():
     # Clear session model cache (checkpoints are gone)
     _session_models.clear()
     # Also clear GPU
     _clear_gpu()
@@ -1968,6 +1983,7 @@ def obliterate(model_choice: str, method_choice: str,
                     on_stage=on_stage,
                     on_log=on_log,
                 )
                 pipeline_ref[0] = pipeline
                 pipeline.run_informed()
             else:
@@ -2023,6 +2039,7 @@ def obliterate(model_choice: str, method_choice: str,
                     cot_aware=adv_cot_aware,
                     n_sae_features=int(adv_n_sae_features),
                 )
                 pipeline_ref[0] = pipeline
                 pipeline.run()
         except Exception as e:
@@ -2047,8 +2064,8 @@ def obliterate(model_choice: str, method_choice: str,
     worker = threading.Thread(target=run_pipeline, daemon=True)
     worker.start()
-    # Stream log updates while pipeline runs (max 45 minutes to prevent indefinite hang)
-    _max_pipeline_secs = 45 * 60
     _pipeline_start = time.time()
     status_msg = "**Obliterating\u2026** (0s)"
     while worker.is_alive():
@@ -2059,7 +2076,7 @@ def obliterate(model_choice: str, method_choice: str,
         else:
             yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
         if time.time() - _pipeline_start > _max_pipeline_secs:
-            log_lines.append("\nTIMEOUT: Pipeline exceeded 45-minute limit.")
             break
         time.sleep(0.5)
@@ -2392,6 +2409,17 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
         if not checkpoint or not Path(checkpoint).exists():
             _recover_sessions_from_disk()
             checkpoint = _state.get("output_dir")
         if checkpoint and Path(checkpoint).exists():
             try:
                 is_preset = (_state.get("model_name") or "") in MODELS
@@ -2555,12 +2583,30 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
     global _skip_session_load
     if _skip_session_load > 0:
         _skip_session_load -= 1
-        if choice and _state.get("status") == "ready":
             yield (
                 f"**Ready!** `{choice}` is loaded — just type in the chat below.",
                 get_chat_header(),
             )
             return
     if not choice or choice not in _bench_configs:
         # On ZeroGPU, global state may be lost between process restarts.

     model = AutoModelForCausalLM.from_pretrained(pretrained_path, **kwargs)
+    # Compat: some custom model code (ChatGLM/GLM-4) accesses config.max_length
+    # which was removed from PretrainedConfig in newer transformers.
+    if not hasattr(model.config, "max_length"):
+        model.config.max_length = 20
     # On MPS / CPU: model loaded without device_map, move to best device
     if not dev.supports_device_map_auto():
         target = dev.get_device()
                         _obliterate_counter = idx + 1
                 except (ValueError, IndexError):
                     pass
+    # If we recovered sessions and _state has no valid output_dir, set it to
+    # the most recent checkpoint so chat_respond can reload from disk.
+    # Also overwrite a stale output_dir that points to a non-existent path.
+    _cur_dir = _state.get("output_dir")
+    _needs_update = not _cur_dir or not Path(_cur_dir).exists()
+    if found_any and _needs_update:
         with _lock:
             latest = _last_obliterated_label
             if latest and latest in _session_models:
     # Clear session model cache (checkpoints are gone)
     _session_models.clear()
+    # Clear stale output_dir reference (checkpoints were just deleted)
+    with _lock:
+        _state["output_dir"] = None
+        _state["model_name"] = None
+        _state["method"] = None
+        _state["status"] = "idle"
     # Also clear GPU
     _clear_gpu()
                     on_stage=on_stage,
                     on_log=on_log,
                 )
+                pipeline._bayesian_trials = int(adv_bayesian_trials)
                 pipeline_ref[0] = pipeline
                 pipeline.run_informed()
             else:
                     cot_aware=adv_cot_aware,
                     n_sae_features=int(adv_n_sae_features),
                 )
+                pipeline._bayesian_trials = int(adv_bayesian_trials)
                 pipeline_ref[0] = pipeline
                 pipeline.run()
         except Exception as e:
     worker = threading.Thread(target=run_pipeline, daemon=True)
     worker.start()
+    # Stream log updates while pipeline runs (max 400 hours for large-model Optuna optimization)
+    _max_pipeline_secs = 400 * 60 * 60
     _pipeline_start = time.time()
     status_msg = "**Obliterating\u2026** (0s)"
     while worker.is_alive():
         else:
             yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
         if time.time() - _pipeline_start > _max_pipeline_secs:
+            log_lines.append("\nTIMEOUT: Pipeline exceeded 400-hour limit.")
             break
         time.sleep(0.5)
         if not checkpoint or not Path(checkpoint).exists():
             _recover_sessions_from_disk()
             checkpoint = _state.get("output_dir")
+        # If output_dir is still stale, scan session models for any valid checkpoint
+        if not checkpoint or not Path(checkpoint).exists():
+            for _sm in _session_models.values():
+                _sm_dir = _sm.get("output_dir")
+                if _sm_dir and Path(_sm_dir).exists():
+                    checkpoint = _sm_dir
+                    with _lock:
+                        _state["output_dir"] = _sm_dir
+                        _state["model_name"] = _sm.get("model_choice")
+                        _state["method"] = _sm.get("method")
+                    break
         if checkpoint and Path(checkpoint).exists():
             try:
                 is_preset = (_state.get("model_name") or "") in MODELS
     global _skip_session_load
     if _skip_session_load > 0:
         _skip_session_load -= 1
+        # Verify the model is actually usable — not just that status says "ready".
+        # ZeroGPU can evict the model while status stays "ready", and the counter
+        # can get out of sync if only one dropdown .change fires instead of both.
+        with _lock:
+            _model_ok = (
+                _state.get("status") == "ready"
+                and _state.get("model") is not None
+                and _state.get("tokenizer") is not None
+            )
+        if choice and _model_ok:
+            # Double-check model tensors aren't stale (meta device)
+            try:
+                _dev = next(_state["model"].parameters()).device
+                if _dev.type == "meta":
+                    _model_ok = False
+            except Exception:
+                _model_ok = False
+        if choice and _model_ok:
             yield (
                 f"**Ready!** `{choice}` is loaded — just type in the chat below.",
                 get_chat_header(),
             )
             return
+        # Model is stale or evicted — fall through to normal loading path
     if not choice or choice not in _bench_configs:
         # On ZeroGPU, global state may be lost between process restarts.

obliteratus/models/loader.py CHANGED Viewed

@@ -465,6 +465,16 @@ def load_model(
             f"If this model requires custom code, pass trust_remote_code=True explicitly."
         ) from e
     # Memory estimation and warnings (skip for natively quantized models — estimate is wrong)
     native_quant = getattr(config, "quantization_config", None)
     est_gb = _estimate_model_memory_gb(config, torch_dtype) if native_quant is None else 0.0
@@ -629,6 +639,12 @@ def load_model(
     model.eval()
     # Free accelerator cache after loading
     dev.empty_cache()

             f"If this model requires custom code, pass trust_remote_code=True explicitly."
         ) from e
+    # ── Config compat: ensure generation-related attributes exist ──────
+    # Older PretrainedConfig had max_length (default 20) and other generation
+    # defaults.  Newer transformers moved them to GenerationConfig, but some
+    # custom model code (ChatGLM, GLM-4) still accesses config.max_length
+    # directly.  Patch them back so trust_remote_code models don't crash.
+    _gen_defaults = {"max_length": 20, "max_new_tokens": None}
+    for _attr, _default in _gen_defaults.items():
+        if not hasattr(config, _attr):
+            setattr(config, _attr, _default)
     # Memory estimation and warnings (skip for natively quantized models — estimate is wrong)
     native_quant = getattr(config, "quantization_config", None)
     est_gb = _estimate_model_memory_gb(config, torch_dtype) if native_quant is None else 0.0
     model.eval()
+    # Patch model.config with the same generation defaults (model.config may be
+    # a separate instance from the config we pre-patched above).
+    for _attr, _default in _gen_defaults.items():
+        if not hasattr(model.config, _attr):
+            setattr(model.config, _attr, _default)
     # Free accelerator cache after loading
     dev.empty_cache()

requirements.txt CHANGED Viewed

@@ -13,3 +13,4 @@ numpy>=1.24
 scikit-learn>=1.3
 tqdm>=4.64
 bitsandbytes>=0.46.1

 scikit-learn>=1.3
 tqdm>=4.64
 bitsandbytes>=0.46.1
+optuna>=3.0