Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import logging | |
| from pathlib import Path | |
| from typing import Optional | |
| import typer | |
| from .pipeline import LabelingConfig, run_labeling | |
| def run( | |
| input_path: Optional[Path] = typer.Option( | |
| None, | |
| "--input", | |
| help="Path to manifest.jsonl or manifest.parquet", | |
| ), | |
| db_path: Optional[Path] = typer.Option( | |
| None, | |
| "--db", | |
| help="Path to SQLite db.sqlite (used when --input is not provided)", | |
| ), | |
| outdir: Optional[Path] = typer.Option( | |
| None, | |
| "--outdir", | |
| help="Output directory for labels (default: input/db parent + /labels)", | |
| ), | |
| model: str = typer.Option( | |
| "Qwen/Qwen2-VL-2B-Instruct", | |
| "--model", | |
| help="Model ID", | |
| ), | |
| device: str = typer.Option( | |
| "auto", | |
| "--device", | |
| help="Device string (auto, cpu, cuda, cuda:0)", | |
| ), | |
| precision: str = typer.Option( | |
| "auto", | |
| "--precision", | |
| help="auto|fp16|bf16|fp32", | |
| ), | |
| batch_size: int = typer.Option(8, "--batch-size", help="Batch size"), | |
| upscale: int = typer.Option(1, "--upscale", help="Upscale factor (e.g. 2 or 4)"), | |
| alpha_bg: str = typer.Option( | |
| "white", | |
| "--alpha-bg", | |
| help="Background for transparent PNGs: white|black|none", | |
| ), | |
| resume: bool = typer.Option(False, "--resume", help="Resume from existing labels.jsonl"), | |
| lang: str = typer.Option("ko", "--lang", help="ko|en|both"), | |
| only_source: str = typer.Option( | |
| "all", | |
| "--only-source", | |
| help="equipment_shape|cash|all", | |
| ), | |
| max_samples: Optional[int] = typer.Option( | |
| None, | |
| "--max-samples", | |
| help="Limit number of samples (for testing)", | |
| ), | |
| no_image: bool = typer.Option(False, "--no-image", help="Use metadata only"), | |
| no_metadata: bool = typer.Option(False, "--no-metadata", help="Use image only"), | |
| log_level: str = typer.Option("info", "--log-level", help="info|debug"), | |
| parquet: bool = typer.Option(False, "--parquet", help="Write labels.parquet"), | |
| load_4bit: bool = typer.Option(False, "--load-4bit", help="Enable 4-bit quantization"), | |
| max_new_tokens: int = typer.Option(384, "--max-new-tokens", help="Max new tokens"), | |
| quality_retry: bool = typer.Option( | |
| False, | |
| "--quality-retry/--no-quality-retry", | |
| help="Retry once with a stricter prompt when output is low quality", | |
| ), | |
| run_id: Optional[str] = typer.Option( | |
| None, | |
| "--run-id", | |
| help="Filter DB inputs by run_id", | |
| ), | |
| ) -> None: | |
| """Generate CLIP-ready labels for MapleStory item icons.""" | |
| logging.basicConfig(level=_parse_log_level(log_level), format="%(levelname)s: %(message)s") | |
| if not input_path and not db_path: | |
| typer.echo("Provide --input or --db") | |
| raise typer.Exit(code=1) | |
| if alpha_bg not in {"white", "black", "none"}: | |
| typer.echo("--alpha-bg must be white, black, or none") | |
| raise typer.Exit(code=1) | |
| if lang not in {"ko", "en", "both"}: | |
| typer.echo("--lang must be ko, en, or both") | |
| raise typer.Exit(code=1) | |
| if only_source not in {"equipment_shape", "cash", "all"}: | |
| typer.echo("--only-source must be equipment_shape, cash, or all") | |
| raise typer.Exit(code=1) | |
| if precision not in {"auto", "fp16", "bf16", "fp32"}: | |
| typer.echo("--precision must be auto, fp16, bf16, or fp32") | |
| raise typer.Exit(code=1) | |
| resolved_outdir = outdir | |
| if not resolved_outdir: | |
| if input_path: | |
| resolved_outdir = input_path.parent / "labels" | |
| else: | |
| resolved_outdir = db_path.parent / "labels" | |
| config = LabelingConfig( | |
| input_path=input_path, | |
| db_path=db_path, | |
| outdir=resolved_outdir, | |
| model_id=model, | |
| device=device, | |
| precision=precision, | |
| batch_size=batch_size, | |
| upscale=upscale, | |
| alpha_bg=alpha_bg, | |
| resume=resume, | |
| lang=lang, | |
| only_source=only_source, | |
| max_samples=max_samples, | |
| no_image=no_image, | |
| no_metadata=no_metadata, | |
| log_level=log_level, | |
| parquet=parquet, | |
| load_4bit=load_4bit, | |
| max_new_tokens=max_new_tokens, | |
| run_id=run_id, | |
| quality_retry=quality_retry, | |
| ) | |
| run_labeling(config) | |
| def _parse_log_level(value: str) -> int: | |
| value = value.lower() | |
| if value == "debug": | |
| return logging.DEBUG | |
| return logging.INFO | |