Spaces:

gyubin02
/

maple-data

Sleeping

e3b104d 3 months ago

4.55 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Optional

	PROMPT_VERSION = "v2"

	SYSTEM_PROMPT_BASE = (
	"You are generating labels for MapleStory item icons for CLIP training. "
	"Return a single JSON object only. Do not output markdown or extra text. "
	"Include item_name verbatim in label_ko, but do not make label_ko identical to item_name. "
	"Use image evidence first; use metadata only when it is visible. Avoid guessing. "
	"If uncertain, set quality_flags.is_uncertain=true and add reasons (e.g. low_visual_signal). "
	"label_ko must be one short Korean sentence with at least two visual descriptors "
	"(color/material/shape/theme/vibe) plus the item name. "
	"Do not list words with commas; write a single natural descriptive sentence. "
	"Describe visible color/material/shape first, then attach the item name naturally. "
	"Do not repeat synonyms (e.g. 검은색, 블랙) in the same label. "
	"tags_ko must be 5-15 short Korean keywords with at least two visual descriptors "
	"and should not be only item_name tokens. "
	"query_variants_ko must be 3-8 natural Korean search queries. "
	"attributes must include colors/theme/material/vibe lists and item_type_guess string or null. "
	"If label_en is not requested, set it to null."
	)

	SYSTEM_PROMPT_STRICT = (
	SYSTEM_PROMPT_BASE
	+ " Output must be valid JSON with double quotes and no trailing commas."
	)

	SYSTEM_PROMPT_QUALITY = (
	SYSTEM_PROMPT_BASE
	+ " Repair low-quality outputs by increasing visual specificity without guessing. "
	"Ensure attributes lists are populated when visible; otherwise set low_visual_signal."
	)

	SYSTEM_PROMPT_QUALITY_STRICT = (
	SYSTEM_PROMPT_QUALITY
	+ " Output must be valid JSON with double quotes and no trailing commas."
	)


	@dataclass
	class PromptInputs:
	item_name: str
	item_description: Optional[str]
	item_part: Optional[str]
	source_type: str
	include_image: bool
	include_metadata: bool
	lang: str


	def build_user_prompt(inputs: PromptInputs) -> str:
	lines = []
	if inputs.include_metadata:
	lines.append(f"item_name: {inputs.item_name}")
	if inputs.item_description:
	lines.append(f"item_description: {inputs.item_description}")
	else:
	lines.append("item_description: (none)")
	if inputs.item_part:
	lines.append(f"item_part: {inputs.item_part}")
	else:
	lines.append("item_part: (none)")
	lines.append(f"source_type: {inputs.source_type}")
	else:
	lines.append("metadata: (not provided)")
	lines.append(f"source_type: {inputs.source_type}")

	if inputs.include_image:
	lines.append("image: provided")
	else:
	lines.append("image: not provided (metadata-only)")

	lines.append(f"language: {inputs.lang}")
	lines.append(
	"Return JSON with keys: label_ko, label_en, tags_ko, attributes, "
	"query_variants_ko, quality_flags."
	)
	return "\n".join(lines)


	def build_messages(user_prompt: str, include_image: bool, strict: bool) -> list[dict[str, object]]:
	system_prompt = SYSTEM_PROMPT_STRICT if strict else SYSTEM_PROMPT_BASE
	if include_image:
	content: list[dict[str, object]] = [
	{"type": "image"},
	{"type": "text", "text": user_prompt},
	]
	else:
	content = [{"type": "text", "text": user_prompt}]
	return [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": content},
	]


	def build_quality_prompt(inputs: PromptInputs) -> str:
	base_prompt = build_user_prompt(inputs)
	quality_lines = [
	"quality_mode: improve visual specificity",
	(
	"requirements: label_ko includes item_name + >=2 visual descriptors; "
	"tags_ko include >=2 visual descriptors; fill attributes when visible; "
	"if not, set quality_flags.is_uncertain=true with reason low_visual_signal."
	),
	]
	return base_prompt + "\n" + "\n".join(quality_lines)


	def build_quality_messages(user_prompt: str, include_image: bool) -> list[dict[str, object]]:
	if include_image:
	content: list[dict[str, object]] = [
	{"type": "image"},
	{"type": "text", "text": user_prompt},
	]
	else:
	content = [{"type": "text", "text": user_prompt}]
	return [
	{"role": "system", "content": SYSTEM_PROMPT_QUALITY_STRICT},
	{"role": "user", "content": content},
	]