ImageStudio

Runtime error

nsfwalex Claude Opus 4.8 commited on 6 days ago

Commit

a2d7364

1 Parent(s): 63b1a96

Switch assistant back to gemma-4-E4B (bf16)

VLM_MODEL_ID default -> prithivMLmods/gemma-4-E4B-it-Uncensored-MAX (fast,
~2s warm). Qwen 9B/4B + gemma 12B kept commented for revert. App reload.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

Files changed (1) hide show

app.py +8 -9

app.py CHANGED Viewed

@@ -152,16 +152,15 @@ print("Pipelines loaded!")
 # -----------------------------------------------------------------------------
 # Other models, kept for easy revert (set VLM_MODEL_ID, and VLM_LOAD_8BIT for big ones):
 #   "rodrigomt/Qwen3.5-4B-Uncensored-Aggressive"          # no generation_config; needed eos pinning
-#   "prithivMLmods/gemma-4-E4B-it-Uncensored-MAX"         # gemma4, small/fast bf16; under-rates explicit (~2)
 #   "OpenYourMind/gemma-4-12B-it-abliterated-uncensored"  # gemma4_unified, ~24GB; needs VLM_LOAD_8BIT=1 (slow)
-# Current: 9B uncensored Qwen3.5 VL (model_type=qwen3_5 / Qwen3_5ForConditionalGeneration).
-# Vision-capable (image-text-to-text), bf16 (~18.8 GB — heaviest that still fits the
-# zero-a10g alongside both diffusion pipelines, verified no OOM). Thinking model
-# (Reasoning On/Off toggle is meaningful). Its generation_config eos is just
-# <|endoftext|> and omits the chat terminator <|im_end|>; _resolve_vlm_eos_ids() unions
-# both so stopping still works. VLM_LOAD_8BIT=1 forces bitsandbytes 8-bit (only needed
-# for the 12B); default is bf16.
-VLM_MODEL_ID = os.environ.get("VLM_MODEL_ID", "ccharnkij/Qwen3.5-9B-Uncensored")
 VLM_LOAD_8BIT = os.environ.get("VLM_LOAD_8BIT", "0").lower() not in ("0", "false", "no", "")
 print(f"Loading assistant: {VLM_MODEL_ID} (8bit={VLM_LOAD_8BIT}) ...")

 # -----------------------------------------------------------------------------
 # Other models, kept for easy revert (set VLM_MODEL_ID, and VLM_LOAD_8BIT for big ones):
 #   "rodrigomt/Qwen3.5-4B-Uncensored-Aggressive"          # no generation_config; needed eos pinning
+#   "ccharnkij/Qwen3.5-9B-Uncensored"                     # 9B Qwen3.5 VL, ~18.8 GB bf16, thinking model
 #   "OpenYourMind/gemma-4-12B-it-abliterated-uncensored"  # gemma4_unified, ~24GB; needs VLM_LOAD_8BIT=1 (slow)
+# Current: gemma-4-E4B (model_type=gemma4 / Gemma4ForConditionalGeneration), multimodal,
+# uncensored. Small/fast — loads full bf16 (~8 GB, fits the zero-a10g alongside the
+# diffusion pipelines; warm calls ~2 s). Stop tokens are handled model-agnostically by
+# _resolve_vlm_eos_ids() (this model ships a proper generation_config). NOTE: under-rates
+# explicit content (~2 cap) — the known tradeoff for its speed. VLM_LOAD_8BIT=1 forces
+# bitsandbytes 8-bit (only needed for the 12B); default is bf16.
+VLM_MODEL_ID = os.environ.get("VLM_MODEL_ID", "prithivMLmods/gemma-4-E4B-it-Uncensored-MAX")
 VLM_LOAD_8BIT = os.environ.get("VLM_LOAD_8BIT", "0").lower() not in ("0", "false", "no", "")
 print(f"Loading assistant: {VLM_MODEL_ID} (8bit={VLM_LOAD_8BIT}) ...")