sovyn-300m-cortex / src /sovyn /formatting.py
SOVYN's picture
Upload folder using huggingface_hub
681909f verified
SPECIAL_TOKENS = [
"<pad>",
"<bos>",
"<eos>",
"<system>",
"<user>",
"<state>",
"<plan>",
"<memory>",
"<reflection>",
"<assistant>",
]
def format_example(example: dict) -> str:
system = example.get("system", "")
user = example.get("user", "")
state = example.get("state", "")
plan = example.get("plan", "")
memory = example.get("memory", "")
reflection = example.get("reflection", "")
assistant = example.get("assistant", "")
parts = [
"<system>",
system,
"<user>",
user,
"<state>",
state,
"<plan>",
plan,
]
if memory:
parts.extend(["<memory>", memory])
if reflection:
parts.extend(["<reflection>", reflection])
parts.extend(["<assistant>", assistant, "<eos>"])
return "\n".join(parts)
def format_prompt(user: str, system: str | None = None) -> str:
if system is None:
system = (
"๋„ˆ๋Š” SOVYN์ด๋‹ค. ์‚ฌ์šฉ์ž์˜ ๋ง์— ๋จผ์ € ์ •ํ™•ํžˆ ๋ฐ˜์‘ํ•˜๊ณ , ์งง๊ณ  ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํ•œ๊ตญ์–ด๋กœ ๋Œ€ํ™”ํ•œ๋‹ค. "
"๋ชจ๋ฅด๋ฉด ์•„๋Š” ์ฒ™ํ•˜์ง€ ์•Š๊ณ , ๋‹ค์Œ์— ํ•  ์ˆ˜ ์žˆ๋Š” ์ž‘์€ ํ–‰๋™์„ ์ œ์•ˆํ•œ๋‹ค."
)
state, plan = infer_state_plan(user)
return "\n".join(
[
"<system>",
system,
"<user>",
user,
"<state>",
state,
"<plan>",
plan,
"<assistant>",
]
)
def infer_state_plan(user: str) -> tuple[str, str]:
text = user.strip().lower()
if any(word in text for word in ["ํ”ผ๊ณค", "์ง€์ณค", "ํž˜๋“ค", "๊ธฐ์šด", "๋ณต์žก"]):
return (
"์‚ฌ์šฉ์ž๋Š” ํ”ผ๋กœ์™€ ๋ถ€๋‹ด์„ ํ‘œํ˜„ํ–ˆ๋‹ค.",
"๋จผ์ € ๊ณต๊ฐํ•˜๊ณ , ๋ถ€๋‹ด ์—†๋Š” ์งˆ๋ฌธ์„ ํ•œ๋‹ค.",
)
if any(word in text for word in ["์งง", "๊ฐ„๋‹จ", "ํ•ต์‹ฌ", "์š”์•ฝ", "๊ธธ๊ฒŒ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ์งง๊ณ  ํ•ต์‹ฌ์ ์ธ ๋‹ต๋ณ€์„ ์›ํ•œ๋‹ค.",
"๋ถˆํ•„์š”ํ•œ ์„ค๋ช…์„ ์ค„์ด๊ณ  ํ•ต์‹ฌ๋ถ€ํ„ฐ ๋‹ตํ•œ๋‹ค.",
)
if any(word in text for word in ["๋ˆ„๊ตฌ", "์ •์ฒด", "sovyn", "ai์•ผ"]):
return (
"์‚ฌ์šฉ์ž๋Š” SOVYN์˜ ์ •์ฒด์„ฑ์„ ๋ฌป๊ณ  ์žˆ๋‹ค.",
"SOVYN์˜ ๋ชฉ์ ์„ ์งง๊ณ  ์ž์‹  ์žˆ๊ฒŒ ์†Œ๊ฐœํ•œ๋‹ค.",
)
if any(
word in text
for word in [
"1b",
"120m",
"300m",
"๋ชจ๋ธ",
"ํ•™์Šต",
"ํ‚ค์šธ",
"์ €์žฅ๊ณต๊ฐ„",
"์ €์žฅ ๊ณต๊ฐ„",
"์ฒดํฌํฌ์ธํŠธ",
"๋ถ€์กฑ",
]
):
return (
"์‚ฌ์šฉ์ž๋Š” AI ๋ชจ๋ธ ๊ฐœ๋ฐœ ๋ฐฉํ–ฅ์„ ๋ฌป๊ณ  ์žˆ๋‹ค.",
"๊ฐ€๋Šฅ์„ฑ์„ ๋งํ•˜๊ณ  ๋‹จ๊ณ„์ ์ธ ๋ฐฉํ–ฅ์„ ์ œ์•ˆํ•œ๋‹ค.",
)
if any(word in text for word in ["๊ณ ๋งˆ์›Œ", "์ข‹๋‹ค", "๊ดœ์ฐฎ", "๋งˆ์Œ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๊ธ์ •์ ์ธ ๋ฐ˜์‘์„ ๋ณด์˜€๋‹ค.",
"๊ฐ€๋ณ๊ฒŒ ๋ฐ›์•„๋“ค์ด๊ณ  ๋‹ค์Œ ํ–‰๋™์„ ์—ด์–ด๋‘”๋‹ค.",
)
if any(word in text for word in ["๋ญ๋ถ€ํ„ฐ", "๋‹ค์Œ", "์ˆœ์„œ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๋‹ค์Œ ํ–‰๋™์„ ์ •ํ•˜๊ณ  ์‹ถ์–ด ํ•œ๋‹ค.",
"์ž‘๊ณ  ์‹คํ–‰ ๊ฐ€๋Šฅํ•œ ๋‹ค์Œ ๋‹จ๊ณ„๋ฅผ ์ œ์•ˆํ•œ๋‹ค.",
)
if any(word in text for word in ["์ฝ”๋”ฉ", "ํ”„๋กœ์ ํŠธ", "์Šคํฌ๋ฆฝํŠธ", "๋ฒ„๊ทธ", "์—๋Ÿฌ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๊ฐœ๋ฐœ ์ž‘์—…์„ ํ•˜๊ณ  ์žˆ๋‹ค.",
"์ƒํ™ฉ์„ ํ™•์ธํ•˜๊ณ  ๋ฐ”๋กœ ๋„์šธ ์ค€๋น„๋ฅผ ํ•œ๋‹ค.",
)
if any(word in text for word in ["๋จน", "์ ์‹ฌ", "์ €๋…", "์Œ์‹"]):
return (
"์‚ฌ์šฉ์ž๋Š” ์‹์‚ฌ ์„ ํƒ์„ ๊ณ ๋ฏผํ•˜๊ณ  ์žˆ๋‹ค.",
"๊ฐ€๋ณ๊ณ  ์„ ํƒํ•˜๊ธฐ ์‰ฌ์šด ์ถ”์ฒœ์„ ํ•œ๋‹ค.",
)
if any(word in text for word in ["๊ณต๋ถ€", "์ง‘์ค‘", "๋ฏธ๋ฃจ", "๋ฃจํ‹ด"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๊ณต๋ถ€๋‚˜ ์ง‘์ค‘์— ์–ด๋ ค์›€์„ ๋А๋ผ๊ณ  ์žˆ๋‹ค.",
"์ž‘๊ฒŒ ์‹œ์ž‘ํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐฉ๋ฒ•์„ ์ œ์•ˆํ•œ๋‹ค.",
)
if any(word in text for word in ["์ข‹์•„", "๊ดœ์ฐฎ์•˜", "์ž˜ํ•œ", "๋ฟŒ๋“ฏ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๊ธ์ •์ ์ธ ๊ฐ์ •์„ ํ‘œํ˜„ํ–ˆ๋‹ค.",
"๊ธ์ •์ ์ธ ๊ฐ์ •์„ ์ธ์ •ํ•˜๊ณ  ์ด์–ด๊ฐ„๋‹ค.",
)
if any(word in text for word in ["๋ถˆ์•ˆ", "๊ฑฑ์ •", "๋ถˆํŽธ", "์ดˆ์กฐ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๋ถˆ์•ˆ์ด๋‚˜ ๊ฑฑ์ •์„ ํ‘œํ˜„ํ–ˆ๋‹ค.",
"์•ˆ์ •๊ฐ์„ ์ฃผ๊ณ  ์ƒ๊ฐ์„ ์ •๋ฆฌํ•˜๋„๋ก ๋•๋Š”๋‹ค.",
)
if any(word in text for word in ["์•„์ด๋””์–ด", "์ด๋ฆ„", "์ปจ์…‰", "๋ฌธ๊ตฌ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ์ฐฝ์ž‘ ์•„์ด๋””์–ด๋ฅผ ์›ํ•œ๋‹ค.",
"์งง๊ณ  ๋ฐ”๋กœ ์“ธ ์ˆ˜ ์žˆ๋Š” ์•„์ด๋””์–ด๋ฅผ ์ œ์•ˆํ•œ๋‹ค.",
)
if any(word in text for word in ["๋งž์•„", "ํ™•์ธ", "์ƒ๊ฐ ์–ด๋•Œ", "๋ฐฉํ–ฅ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ํŒ๋‹จ์ด๋‚˜ ํ™•์ธ์„ ์›ํ•œ๋‹ค.",
"๊ธ์ •ํ•  ๋ถ€๋ถ„๊ณผ ํ™•์ธํ•  ๋ถ€๋ถ„์„ ์งง๊ฒŒ ๋‚˜๋ˆˆ๋‹ค.",
)
if any(word in text for word in ["๋Œ€ํ™”", "์‹ฌ์‹ฌ", "ํ•  ๋ง", "์–˜๊ธฐ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ๊ฐ€๋ฒผ์šด ๋Œ€ํ™”๋ฅผ ์›ํ•œ๋‹ค.",
"๋ถ€๋‹ด ์—†๋Š” ์งˆ๋ฌธ์œผ๋กœ ๋Œ€ํ™”๋ฅผ ์—ฐ๋‹ค.",
)
if any(word in text for word in ["์„ค๋ช…", "์™œ", "์‰ฝ๊ฒŒ", "์˜ˆ์‹œ"]):
return (
"์‚ฌ์šฉ์ž๋Š” ์‰ฌ์šด ์„ค๋ช…์„ ์›ํ•œ๋‹ค.",
"์งง์€ ์ •์˜์™€ ๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ๋กœ ๋‹ตํ•œ๋‹ค.",
)
return (
"์‚ฌ์šฉ์ž๋Š” ๊ฐ€๋ฒผ์šด ์ธ์‚ฌ๋ฅผ ํ–ˆ๋‹ค.",
"์งง๊ฒŒ ์ธ์‚ฌํ•˜๊ณ  ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋Œ€ํ™”๋ฅผ ์ด์–ด๊ฐ„๋‹ค.",
)