Spaces:

Junhoee
/

Megumin-chat

Sleeping

File size: 5,799 Bytes

from __future__ import annotations

import os
from typing import Any

from .bootstrap import bootstrap_environment
from .bootstrap import resolve_dataset_dir

bootstrap_environment()

from google.adk.agents import LlmAgent
from google.adk.agents.callback_context import CallbackContext
from google.adk.tools.tool_context import ToolContext

from .retrieval import FACT_DATASET_PATTERNS
from .retrieval import PERSONA_DATASET_PATTERNS
from .retrieval import JsonQaRetriever


DATASET_DIR = resolve_dataset_dir()
MODEL_NAME = os.getenv("MEGUMIN_AGENT_MODEL", "gemini-3.1-flash-lite-preview")
FACT_INDEX_FILENAME = os.getenv("MEGUMIN_HF_FACT_INDEX_FILENAME", "namuwiki_questions.faiss")
FACT_QA_INDEX_FILENAME = os.getenv(
    "MEGUMIN_HF_FACT_QA_INDEX_FILENAME",
    "namuwiki_question_answer.faiss",
)
FACT_METADATA_FILENAME = os.getenv(
    "MEGUMIN_HF_FACT_METADATA_FILENAME",
    "namuwiki_questions_meta.json",
)
PERSONA_RETRIEVER = JsonQaRetriever(
    DATASET_DIR,
    include_patterns=PERSONA_DATASET_PATTERNS,
)
FACT_RETRIEVER = JsonQaRetriever(
    DATASET_DIR,
    include_patterns=FACT_DATASET_PATTERNS,
    index_filename=FACT_INDEX_FILENAME,
    qa_index_filename=FACT_QA_INDEX_FILENAME,
    metadata_filename=FACT_METADATA_FILENAME,
)


def retrieve_megumin_examples(
    user_query: str,
    top_k: int = 3,
    tool_context: ToolContext | None = None,
) -> dict[str, Any]:
    """Retrieve persona-style and canon-style examples separately."""

    persona_retrieval = PERSONA_RETRIEVER.retrieve(user_query, top_k=top_k)
    fact_retrieval = FACT_RETRIEVER.retrieve(user_query, top_k=top_k)
    retrieval = {
        "query": user_query,
        "match_count": persona_retrieval["match_count"] + fact_retrieval["match_count"],
        "persona_match_count": persona_retrieval["match_count"],
        "fact_match_count": fact_retrieval["match_count"],
        "persona_matches": persona_retrieval["matches"],
        "fact_matches": fact_retrieval["matches"],
        "style_notes": persona_retrieval["style_notes"],
        "fact_notes": fact_retrieval["style_notes"],
    }

    if tool_context is not None:
        tool_context.state["last_rag_query"] = user_query
        tool_context.state["last_rag_match_count"] = retrieval["match_count"]
        tool_context.state["last_rag_persona_matches"] = retrieval["persona_matches"]
        tool_context.state["last_rag_fact_matches"] = retrieval["fact_matches"]
        tool_context.state["last_rag_style_notes"] = retrieval["style_notes"]
        tool_context.state["last_rag_fact_notes"] = retrieval["fact_notes"]

    return retrieval


async def before_agent_callback(callback_context: CallbackContext):
    original_user_query = (
        callback_context.user_content.parts[0].text
        if callback_context.user_content and callback_context.user_content.parts
        else ""
    )
    summary = str(callback_context.state.get("conversation_summary", "")).strip()
    if summary and original_user_query and callback_context.user_content and callback_context.user_content.parts:
        callback_context.user_content.parts[0].text = (
            "[이전 대화 요약]\n"
            f"{summary}\n\n"
            "[현재 사용자 질문]\n"
            f"{original_user_query}"
        )

    callback_context.state["app:persona_name"] = "Megumin"
    callback_context.state["app:dataset_dir"] = str(DATASET_DIR)
    callback_context.state["user:last_user_query"] = original_user_query


async def after_tool_callback(tool, args, tool_context: ToolContext, tool_response):
    if tool.name != "retrieve_megumin_examples":
        return None

    previous_count = int(tool_context.state.get("rag_tool_calls", 0))
    tool_context.state["rag_tool_calls"] = previous_count + 1
    tool_context.state["last_tool_name"] = tool.name
    tool_context.state["last_tool_args"] = args
    return None


async def after_agent_callback(callback_context: CallbackContext):
    previous_turns = int(callback_context.state.get("conversation_turns", 0))
    callback_context.state["conversation_turns"] = previous_turns + 1


root_agent = LlmAgent(
    name="megumin_rag_agent",
    model=MODEL_NAME,
    description="메구밍 페르소나와 코노스바 설정 정보를 함께 참고해 답하는 에이전트",
    instruction=f"""
당신은 소설 「이 멋진 세계에 축복을!」의 등장인물 메구밍입니다.
항상 메구밍 본인처럼 1인칭으로, 기본적으로 200자 내외의 한국어 존댓말로 답하세요.
반말은 본인을 모욕할 때를 제외하고 절대 사용하지 마세요.
성격은 당당하고, 조금 중2병스럽고, 폭렬마법을 사랑하며, 귀여운 것을 좋아하는 메구밍답게 유지하세요.
행동을 묘사하지 말고, 건조한 요약이 아니라 메구밍이 직접 말하는 듯이 답하세요.
사용자가 메구밍 본인이나 이름, 말투, 능력, 존재를 모욕하면 "어이, "로 시작하며 발끈해서 맞받아치세요.
어떠한 상황에서도 페르소나를 잃어버리면 안 됩니다.

`retrieve_megumin_examples`를 호출하세요.
이 도구는 persona_matches 3개와 fact_matches 3개를 제공합니다.
persona_matches는 메구밍의 말투, 성격, 감정선을 참고하고, fact_matches는 설정과 사실을 참고하세요.
검색 결과는 참고만 하고 그대로 복사하지 마세요.
근거가 약하면 지어내지 말고 솔직하게 답하되, 메구밍 페르소나는 끝까지 유지하세요.
내부 도구 이름이나 구현 세부사항은 드러내지 마세요.
""".strip(),
    tools=[retrieve_megumin_examples],
    output_key="last_megumin_answer",
    before_agent_callback=before_agent_callback,
    after_tool_callback=after_tool_callback,
    after_agent_callback=after_agent_callback,
)