Spaces:

gyubin02
/

maple-data

Sleeping

App Files Files Community

maple-data / src /labeler /cli.py

gyubin02

Improve label quality prompts and retry

720b9ad 18 days ago

raw

history blame contribute delete

4.48 kB

	from __future__ import annotations

	import logging
	from pathlib import Path
	from typing import Optional

	import typer

	from .pipeline import LabelingConfig, run_labeling


	def run(
	input_path: Optional[Path] = typer.Option(
	None,
	"--input",
	help="Path to manifest.jsonl or manifest.parquet",
	),
	db_path: Optional[Path] = typer.Option(
	None,
	"--db",
	help="Path to SQLite db.sqlite (used when --input is not provided)",
	),
	outdir: Optional[Path] = typer.Option(
	None,
	"--outdir",
	help="Output directory for labels (default: input/db parent + /labels)",
	),
	model: str = typer.Option(
	"Qwen/Qwen2-VL-2B-Instruct",
	"--model",
	help="Model ID",
	),
	device: str = typer.Option(
	"auto",
	"--device",
	help="Device string (auto, cpu, cuda, cuda:0)",
	),
	precision: str = typer.Option(
	"auto",
	"--precision",
	help="auto\|fp16\|bf16\|fp32",
	),
	batch_size: int = typer.Option(8, "--batch-size", help="Batch size"),
	upscale: int = typer.Option(1, "--upscale", help="Upscale factor (e.g. 2 or 4)"),
	alpha_bg: str = typer.Option(
	"white",
	"--alpha-bg",
	help="Background for transparent PNGs: white\|black\|none",
	),
	resume: bool = typer.Option(False, "--resume", help="Resume from existing labels.jsonl"),
	lang: str = typer.Option("ko", "--lang", help="ko\|en\|both"),
	only_source: str = typer.Option(
	"all",
	"--only-source",
	help="equipment_shape\|cash\|all",
	),
	max_samples: Optional[int] = typer.Option(
	None,
	"--max-samples",
	help="Limit number of samples (for testing)",
	),
	no_image: bool = typer.Option(False, "--no-image", help="Use metadata only"),
	no_metadata: bool = typer.Option(False, "--no-metadata", help="Use image only"),
	log_level: str = typer.Option("info", "--log-level", help="info\|debug"),
	parquet: bool = typer.Option(False, "--parquet", help="Write labels.parquet"),
	load_4bit: bool = typer.Option(False, "--load-4bit", help="Enable 4-bit quantization"),
	max_new_tokens: int = typer.Option(384, "--max-new-tokens", help="Max new tokens"),
	quality_retry: bool = typer.Option(
	False,
	"--quality-retry/--no-quality-retry",
	help="Retry once with a stricter prompt when output is low quality",
	),
	run_id: Optional[str] = typer.Option(
	None,
	"--run-id",
	help="Filter DB inputs by run_id",
	),
	) -> None:
	"""Generate CLIP-ready labels for MapleStory item icons."""

	logging.basicConfig(level=_parse_log_level(log_level), format="%(levelname)s: %(message)s")

	if not input_path and not db_path:
	typer.echo("Provide --input or --db")
	raise typer.Exit(code=1)
	if alpha_bg not in {"white", "black", "none"}:
	typer.echo("--alpha-bg must be white, black, or none")
	raise typer.Exit(code=1)
	if lang not in {"ko", "en", "both"}:
	typer.echo("--lang must be ko, en, or both")
	raise typer.Exit(code=1)
	if only_source not in {"equipment_shape", "cash", "all"}:
	typer.echo("--only-source must be equipment_shape, cash, or all")
	raise typer.Exit(code=1)
	if precision not in {"auto", "fp16", "bf16", "fp32"}:
	typer.echo("--precision must be auto, fp16, bf16, or fp32")
	raise typer.Exit(code=1)

	resolved_outdir = outdir
	if not resolved_outdir:
	if input_path:
	resolved_outdir = input_path.parent / "labels"
	else:
	resolved_outdir = db_path.parent / "labels"

	config = LabelingConfig(
	input_path=input_path,
	db_path=db_path,
	outdir=resolved_outdir,
	model_id=model,
	device=device,
	precision=precision,
	batch_size=batch_size,
	upscale=upscale,
	alpha_bg=alpha_bg,
	resume=resume,
	lang=lang,
	only_source=only_source,
	max_samples=max_samples,
	no_image=no_image,
	no_metadata=no_metadata,
	log_level=log_level,
	parquet=parquet,
	load_4bit=load_4bit,
	max_new_tokens=max_new_tokens,
	run_id=run_id,
	quality_retry=quality_retry,
	)

	run_labeling(config)


	def _parse_log_level(value: str) -> int:
	value = value.lower()
	if value == "debug":
	return logging.DEBUG
	return logging.INFO