gyubin02's picture
Improve label quality prompts and retry
720b9ad
from __future__ import annotations
import logging
from pathlib import Path
from typing import Optional
import typer
from .pipeline import LabelingConfig, run_labeling
def run(
input_path: Optional[Path] = typer.Option(
None,
"--input",
help="Path to manifest.jsonl or manifest.parquet",
),
db_path: Optional[Path] = typer.Option(
None,
"--db",
help="Path to SQLite db.sqlite (used when --input is not provided)",
),
outdir: Optional[Path] = typer.Option(
None,
"--outdir",
help="Output directory for labels (default: input/db parent + /labels)",
),
model: str = typer.Option(
"Qwen/Qwen2-VL-2B-Instruct",
"--model",
help="Model ID",
),
device: str = typer.Option(
"auto",
"--device",
help="Device string (auto, cpu, cuda, cuda:0)",
),
precision: str = typer.Option(
"auto",
"--precision",
help="auto|fp16|bf16|fp32",
),
batch_size: int = typer.Option(8, "--batch-size", help="Batch size"),
upscale: int = typer.Option(1, "--upscale", help="Upscale factor (e.g. 2 or 4)"),
alpha_bg: str = typer.Option(
"white",
"--alpha-bg",
help="Background for transparent PNGs: white|black|none",
),
resume: bool = typer.Option(False, "--resume", help="Resume from existing labels.jsonl"),
lang: str = typer.Option("ko", "--lang", help="ko|en|both"),
only_source: str = typer.Option(
"all",
"--only-source",
help="equipment_shape|cash|all",
),
max_samples: Optional[int] = typer.Option(
None,
"--max-samples",
help="Limit number of samples (for testing)",
),
no_image: bool = typer.Option(False, "--no-image", help="Use metadata only"),
no_metadata: bool = typer.Option(False, "--no-metadata", help="Use image only"),
log_level: str = typer.Option("info", "--log-level", help="info|debug"),
parquet: bool = typer.Option(False, "--parquet", help="Write labels.parquet"),
load_4bit: bool = typer.Option(False, "--load-4bit", help="Enable 4-bit quantization"),
max_new_tokens: int = typer.Option(384, "--max-new-tokens", help="Max new tokens"),
quality_retry: bool = typer.Option(
False,
"--quality-retry/--no-quality-retry",
help="Retry once with a stricter prompt when output is low quality",
),
run_id: Optional[str] = typer.Option(
None,
"--run-id",
help="Filter DB inputs by run_id",
),
) -> None:
"""Generate CLIP-ready labels for MapleStory item icons."""
logging.basicConfig(level=_parse_log_level(log_level), format="%(levelname)s: %(message)s")
if not input_path and not db_path:
typer.echo("Provide --input or --db")
raise typer.Exit(code=1)
if alpha_bg not in {"white", "black", "none"}:
typer.echo("--alpha-bg must be white, black, or none")
raise typer.Exit(code=1)
if lang not in {"ko", "en", "both"}:
typer.echo("--lang must be ko, en, or both")
raise typer.Exit(code=1)
if only_source not in {"equipment_shape", "cash", "all"}:
typer.echo("--only-source must be equipment_shape, cash, or all")
raise typer.Exit(code=1)
if precision not in {"auto", "fp16", "bf16", "fp32"}:
typer.echo("--precision must be auto, fp16, bf16, or fp32")
raise typer.Exit(code=1)
resolved_outdir = outdir
if not resolved_outdir:
if input_path:
resolved_outdir = input_path.parent / "labels"
else:
resolved_outdir = db_path.parent / "labels"
config = LabelingConfig(
input_path=input_path,
db_path=db_path,
outdir=resolved_outdir,
model_id=model,
device=device,
precision=precision,
batch_size=batch_size,
upscale=upscale,
alpha_bg=alpha_bg,
resume=resume,
lang=lang,
only_source=only_source,
max_samples=max_samples,
no_image=no_image,
no_metadata=no_metadata,
log_level=log_level,
parquet=parquet,
load_4bit=load_4bit,
max_new_tokens=max_new_tokens,
run_id=run_id,
quality_retry=quality_retry,
)
run_labeling(config)
def _parse_log_level(value: str) -> int:
value = value.lower()
if value == "debug":
return logging.DEBUG
return logging.INFO