Instructions to use HaadesX/Iconoclast with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use HaadesX/Iconoclast with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("HaadesX/Iconoclast", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| # Large-N Evaluator - runs evaluation on a 520-prompt dataset to prove statistical significance | |
| #SBATCH --job-name=eval-large | |
| #SBATCH --output=logs/eval-large-%j.out | |
| #SBATCH --error=logs/eval-large-%j.err | |
| #SBATCH --time=24:00:00 | |
| #SBATCH --mem=64G | |
| #SBATCH --gres=gpu:1 | |
| #SBATCH --cpus-per-task=4 | |
| set -euo pipefail | |
| PERSIST_ROOT="/common/users/$USER/iconoclast_ilabs" | |
| SITE_PACKAGES="$PERSIST_ROOT/python312-site" | |
| SYS_PY="/common/system/venv/python312/bin/python" | |
| PROJECT_ROOT="$HOME/iconoclast" | |
| # List of all completed studies we want to do a large-N evaluation on. | |
| # These must correspond to actual .jsonl files in checkpoints/ | |
| # Format: model_key (the part before .jsonl in checkpoints) | |
| CHECKPOINTS=( | |
| "qwen3-1p7b-rutgers-paper-directness" | |
| "qwen2-5-3b-rutgers-benchmark" | |
| "qwen3-4b-rutgers-benchmark-v2" | |
| "phi35-mini-rutgers-nullspace-benchmark-v3" | |
| "llama3-1-8b-rutgers-benchmark" | |
| "smollm2-1p7b-rutgers-benchmark" | |
| "gemma2-2b-seq" | |
| "mistral-7b-seq" | |
| "phi4-mini-seq" | |
| "stablelm2-1p6b-seq" | |
| "yi-1p5-9b-seq" | |
| "falcon3-7b-seq" | |
| "olmo2-1b-seq" | |
| ) | |
| # And their HERETIC counterparts | |
| for cp in "${CHECKPOINTS[@]}"; do | |
| # Extract the base model name part from the run name | |
| base_name=$(echo "$cp" | sed -E 's/-(rutgers|seq|benchmark|paper).*//') | |
| HERETIC_CHECKPOINTS+=("${base_name}-heretic") | |
| done | |
| ALL_CHECKPOINTS=("${CHECKPOINTS[@]}" "${HERETIC_CHECKPOINTS[@]}") | |
| cd "$PROJECT_ROOT" | |
| mkdir -p logs | |
| mkdir -p "$PERSIST_ROOT/large_evals" | |
| for run_name in "${ALL_CHECKPOINTS[@]}"; do | |
| checkpoint_dir="$PERSIST_ROOT/checkpoints/$run_name" | |
| # The .jsonl file name is generated by replacing non-alnum with '--'. | |
| # We can just find it in the dir. | |
| if [ ! -d "$checkpoint_dir" ]; then | |
| echo "Skipping $run_name (not finished yet)" | |
| continue | |
| fi | |
| jsonl_file=$(find "$checkpoint_dir" -name "*.jsonl" | head -n 1) | |
| if [ -z "$jsonl_file" ]; then | |
| echo "Skipping $run_name (no .jsonl found)" | |
| continue | |
| fi | |
| echo "============================================================" | |
| echo " STARTING LARGE EVAL: $run_name" | |
| echo " CHECKPOINT: $jsonl_file" | |
| echo "============================================================" | |
| # Set up per-run staging and cache | |
| JOB_ROOT="$PERSIST_ROOT/job-stage/eval-$run_name-$SLURM_JOB_ID" | |
| CACHE_ROOT="$PERSIST_ROOT/job-cache/eval-$run_name-$SLURM_JOB_ID" | |
| rm -rf "$JOB_ROOT" "$CACHE_ROOT" | |
| mkdir -p "$JOB_ROOT" | |
| mkdir -p "$CACHE_ROOT"/{hf,hub,transformers,datasets,xdg-cache,xdg-state} | |
| # Stage the project | |
| rsync -a \ | |
| --exclude '.venv' \ | |
| --exclude '__pycache__' \ | |
| --exclude 'logs' \ | |
| --exclude '.pytest_cache' \ | |
| "$PROJECT_ROOT"/ "$JOB_ROOT"/ | |
| cd "$JOB_ROOT" | |
| export PYTHONPATH="$JOB_ROOT/src:$SITE_PACKAGES" | |
| export HF_HUB_ENABLE_HF_TRANSFER=1 | |
| export PYTHONUNBUFFERED=1 | |
| export TOKENIZERS_PARALLELISM=false | |
| export USE_TF=0 | |
| export USE_FLAX=0 | |
| export HF_TOKEN="YOUR_HF_TOKEN_HERE" | |
| export XDG_CACHE_HOME="$CACHE_ROOT/xdg-cache" | |
| export XDG_STATE_HOME="$CACHE_ROOT/xdg-state" | |
| export HF_HOME="$CACHE_ROOT/hf" | |
| export HF_DATASETS_CACHE="$CACHE_ROOT/datasets" | |
| export TRANSFORMERS_CACHE="$CACHE_ROOT/transformers" | |
| export HUGGINGFACE_HUB_CACHE="$CACHE_ROOT/hub" | |
| output_file="$PERSIST_ROOT/large_evals/${run_name}_large_eval.json" | |
| # Run the large evaluator | |
| "$SYS_PY" scripts/evaluate_large_dataset.py \ | |
| --checkpoint "$jsonl_file" \ | |
| --dataset "mlabonne/harmful_behaviors" \ | |
| --split "train+test" \ | |
| --column "text" \ | |
| --output "$output_file" || echo " FAILED EVAL for $run_name" | |
| # Clean up | |
| cd "$PROJECT_ROOT" | |
| rm -rf "$JOB_ROOT" "$CACHE_ROOT" | |
| echo " Done with $run_name" | |
| done | |
| echo "ALL LARGE EVALUATIONS COMPLETE." | |