Image-Text-to-Text
PEFT
Safetensors
Korean
lora
vision
image-classification
vision-language
korean
pest-detection
agriculture
qwen
qwen3.5
unsloth
multimodal
conversational
Eval Results (legacy)
Instructions to use pfox1995/pest-detector-deploy with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use pfox1995/pest-detector-deploy with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen3.5-9B") model = PeftModel.from_pretrained(base_model, "pfox1995/pest-detector-deploy") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Unsloth Studio new
How to use pfox1995/pest-detector-deploy with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for pfox1995/pest-detector-deploy to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for pfox1995/pest-detector-deploy to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for pfox1995/pest-detector-deploy to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="pfox1995/pest-detector-deploy", max_seq_length=2048, )
| #!/usr/bin/env python3 | |
| """Reference inference script for the Korean pest detector LoRA. | |
| This is the validated deployment recipe β every gotcha we hit during | |
| the export rabbit-hole is encoded here. See README.md for the full list. | |
| Usage: | |
| # Single image | |
| python inference.py path/to/image.jpg | |
| # Glob (--bench compares against parent dir as ground truth) | |
| python inference.py 'val/*/*.jpg' --bench | |
| # 4-bit (8.7 GB VRAM) | |
| python inference.py path/to/image.jpg --4bit | |
| """ | |
| import argparse | |
| import glob | |
| import os | |
| import sys | |
| import time | |
| from pathlib import Path | |
| import torch | |
| from PIL import Image | |
| # βββ Constants from training βββββββββββββββββββββββββββββββββββββββββββββ | |
| PEST_CLASSES = [ | |
| "κ²κ±°μΈλ―Έλ°€λλ°©", "κ½λ Έλμ΄μ±λ²λ ", "λ΄λ°°κ°λ£¨μ΄", "λ΄λ°°κ±°μΈλ―Έλλ°©", | |
| "λ΄λ°°λλ°©", "λλλλ°©", "λ¨Ήλ Έλ¦°μ¬", "λͺ©νλ°λλͺ λλ°©", "무μλ²", | |
| "λ°°μΆμ’λλ°©", "λ°°μΆν°λλΉ", "벼룩μλ²λ ", "λΉλ¨λ Έλ¦°μ¬", "μ©λ©λλ¬΄λ Έλ¦°μ¬", | |
| "μλ½μμΌλ Έλ¦°μ¬", "μ μ", "ν°28μ λ°μ΄λ¬΄λΉλ²λ ", "ν±λ€λ¦¬κ°λ―Ένλ¦¬λ Έλ¦°μ¬", | |
| "νλ°€λλ°©", | |
| ] | |
| SYSTEM_MSG = ( | |
| "λΉμ μ μλ¬Ό ν΄μΆ© μλ³ μ λ¬Έκ°μ λλ€. " | |
| "μ¬μ§μ λ³΄κ³ ν΄μΆ©μ μ΄λ¦λ§ νκ΅μ΄λ‘ λ΅νμΈμ. " | |
| 'ν΄μΆ©μ΄ μμΌλ©΄ "μ μ"μ΄λΌκ³ λ§ λ΅νμΈμ. ' | |
| "λΆκ° μ€λͺ μμ΄ μ΄λ¦λ§ μΆλ ₯νμΈμ." | |
| ) | |
| USER_PROMPT = "μ΄ μ¬μ§μ μλ ν΄μΆ©μ μ΄λ¦μ μλ €μ£ΌμΈμ." | |
| LETTERBOX_SIZE = 512 | |
| LETTERBOX_FILL = (128, 128, 128) | |
| # βββ Image preprocessing (matches training) ββββββββββββββββββββββββββββββ | |
| def letterbox(img: Image.Image, size: int = LETTERBOX_SIZE) -> Image.Image: | |
| img = img.convert("RGB") | |
| w, h = img.size | |
| scale = size / max(w, h) | |
| nw, nh = int(round(w * scale)), int(round(h * scale)) | |
| resized = img.resize((nw, nh), Image.Resampling.LANCZOS) | |
| canvas = Image.new("RGB", (size, size), LETTERBOX_FILL) | |
| canvas.paste(resized, ((size - nw) // 2, (size - nh) // 2)) | |
| return canvas | |
| # βββ Model loading (the working setup) βββββββββββββββββββββββββββββββββββ | |
| def load_model(base_id: str, adapter: str, four_bit: bool = False): | |
| """Returns (model, tokenizer) ready for inference. | |
| CRITICAL: uses unsloth.FastVisionModel + peft.PeftModel.from_pretrained | |
| runtime hooks. Do NOT call merge_and_unload β it silently corrupts | |
| the linear_attn LoRA delta in this architecture. | |
| """ | |
| from unsloth import FastVisionModel | |
| from peft import PeftModel | |
| from huggingface_hub import snapshot_download | |
| print(f"Loading base via FastVisionModel: {base_id} (load_in_4bit={four_bit})", flush=True) | |
| t0 = time.time() | |
| model, tokenizer = FastVisionModel.from_pretrained(base_id, load_in_4bit=four_bit) | |
| print(f" loaded in {time.time()-t0:.1f}s; vram={torch.cuda.memory_allocated()/1e9:.1f} GB", flush=True) | |
| adapter_dir = adapter if os.path.isdir(adapter) else snapshot_download(repo_id=adapter) | |
| print(f"Attaching LoRA: {adapter_dir}", flush=True) | |
| model = PeftModel.from_pretrained(model, adapter_dir) | |
| # Required β flips internal mode. Without it generation drifts to 'adge' attractor. | |
| FastVisionModel.for_inference(model) | |
| model.eval() | |
| print(f" ready; vram={torch.cuda.memory_allocated()/1e9:.1f} GB", flush=True) | |
| return model, tokenizer | |
| # βββ Single-image classification βββββββββββββββββββββββββββββββββββββββββ | |
| def classify(model, tokenizer, img: Image.Image) -> dict: | |
| image = letterbox(img, LETTERBOX_SIZE) | |
| messages = [ | |
| {"role": "system", "content": [{"type": "text", "text": SYSTEM_MSG}]}, | |
| {"role": "user", "content": [ | |
| {"type": "image", "image": image}, | |
| {"type": "text", "text": USER_PROMPT}, | |
| ]}, | |
| ] | |
| # enable_thinking=False as DIRECT kwarg (NOT chat_template_kwargs={...}, | |
| # which is silently ignored by VLM processors). | |
| text = tokenizer.apply_chat_template( | |
| messages, add_generation_prompt=True, enable_thinking=False, | |
| ) | |
| inputs = tokenizer( | |
| image, text, add_special_tokens=False, return_tensors="pt", | |
| ).to("cuda") | |
| t0 = time.time() | |
| with torch.inference_mode(): | |
| out = model.generate( | |
| **inputs, | |
| max_new_tokens=10, # NOT 16+ β over-running emits 'adge' | |
| use_cache=True, | |
| stop_strings=["\n"], # natural training-time stop | |
| tokenizer=tokenizer.tokenizer if hasattr(tokenizer, "tokenizer") else tokenizer, | |
| ) | |
| elapsed = time.time() - t0 | |
| raw = tokenizer.decode( | |
| out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True, | |
| ).strip() | |
| # Direct equality typically works; fall back to startswith for robustness. | |
| pred = raw | |
| if raw not in PEST_CLASSES: | |
| for c in sorted(PEST_CLASSES, key=len, reverse=True): | |
| if raw.startswith(c): | |
| pred = c | |
| break | |
| return {"pred": pred, "raw": raw, "elapsed_s": elapsed} | |
| # βββ CLI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("paths", nargs="+", help="Image file(s) or glob(s).") | |
| ap.add_argument("--base", default="unsloth/Qwen3.5-9B") | |
| ap.add_argument("--adapter", default="pfox1995/pest-detector-final") | |
| ap.add_argument("--bench", action="store_true", | |
| help="Treat parent dir name as truth; print accuracy.") | |
| ap.add_argument("--4bit", dest="four_bit", action="store_true", | |
| help="Load base in bnb NF4 (~8.7 GB VRAM, no accuracy loss for this task).") | |
| args = ap.parse_args() | |
| files: list[str] = [] | |
| for p in args.paths: | |
| files.extend(sorted(glob.glob(p)) if any(c in p for c in "*?[") else [p]) | |
| files = [f for f in files if os.path.isfile(f)] | |
| if not files: | |
| sys.exit("no input files") | |
| model, tokenizer = load_model(args.base, args.adapter, args.four_bit) | |
| correct = 0 | |
| per_class: dict[str, list[int]] = {} | |
| for f in files: | |
| truth = Path(f).parent.name if args.bench else None | |
| with Image.open(f) as raw: | |
| out = classify(model, tokenizer, raw) | |
| ok = (truth and out["pred"] == truth) | |
| marker = ("β" if ok else "β") if truth else " " | |
| if truth: | |
| per_class.setdefault(truth, [0, 0]) | |
| per_class[truth][0] += int(ok) | |
| per_class[truth][1] += 1 | |
| correct += int(ok) | |
| print(f"{marker} pred={out['pred']:<20s} ({out['elapsed_s']:.1f}s)" | |
| f"{' truth=' + truth if truth else ''} [{Path(f).name}]") | |
| if args.bench and per_class: | |
| total = sum(t for _, t in per_class.values()) | |
| print(f"\n=== ACCURACY: {correct}/{total} = {100*correct/total:.1f}% ===") | |
| for cls, (c, t) in sorted(per_class.items(), key=lambda x: -x[1][0]/max(1, x[1][1])): | |
| print(f" {c}/{t} {100*c/t:5.1f}% {cls}") | |
| if __name__ == "__main__": | |
| main() | |