Spaces:

build-small-hackathon
/

ObjectverseDiary

Running on Zero

File size: 13,235 Bytes

"""Text generation runtime with mock and optional llama.cpp backends."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from src.config import RuntimeSettings, get_runtime_settings
from src.models.schema import DiaryEntry, ObjectUnderstanding, Persona, PersonaEnvelope
from src.prompts.diary_generation import (
    CHAT_REPLY_PROMPT,
    DIARY_GENERATION_PROMPT,
    PERSONA_DIARY_GENERATION_PROMPT,
)
from src.prompts.persona_generation import PERSONA_GENERATION_PROMPT
from src.utils.json_repair import parse_json_object


MODE_PROFILES = {
    "Cynical": {
        "mood": "tired but sarcastic",
        "fear": "being replaced by a newer object with worse opinions",
        "voice": "dry",
    },
    "Dramatic": {
        "mood": "theatrical and wounded",
        "fear": "being forgotten before the final act",
        "voice": "operatic",
    },
    "Lonely": {
        "mood": "softly abandoned",
        "fear": "becoming invisible in plain sight",
        "voice": "quiet",
    },
    "Philosopher": {
        "mood": "curious and needlessly profound",
        "fear": "discovering that usefulness is not meaning",
        "voice": "reflective",
    },
    "Romantic": {
        "mood": "hopelessly sentimental",
        "fear": "loving a human who only sees storage capacity",
        "voice": "wistful",
    },
}

LLAMA_CPP_BACKENDS = {"llama-cpp", "llama_cpp", "llamacpp"}
TEXT_FALLBACK_TO_MOCK = "text-fallback-to-mock"

_LLAMA_MODEL: Any | None = None
_LLAMA_MODEL_PATH: str | None = None
_TEXT_FALLBACKS: list[str] = []


def generate_persona(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings):
        try:
            return _generate_persona_llama_cpp(object_understanding, mode, settings)
        except Exception as exc:
            _log_text_fallback("persona", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    return _generate_persona_mock(object_understanding, mode)


def generate_persona_and_diary(
    object_understanding: ObjectUnderstanding,
    mode: str,
) -> tuple[PersonaEnvelope, DiaryEntry]:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings):
        try:
            return _generate_persona_and_diary_llama_cpp(object_understanding, mode, settings)
        except Exception as exc:
            _log_text_fallback("persona+diary", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    persona = _generate_persona_mock(object_understanding, mode)
    return persona, _generate_diary_mock(persona, mode)


def generate_diary(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
        try:
            return _generate_diary_llama_cpp(persona, mode, settings)
        except Exception as exc:
            _log_text_fallback("diary", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    return _generate_diary_mock(persona, mode)


def reply_as_object(persona_data: dict, message: str) -> str:
    settings = get_runtime_settings()
    if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS:
        try:
            return _reply_as_object_llama_cpp(persona_data, message, settings)
        except Exception as exc:
            _log_text_fallback("chat", exc)
            _add_text_fallback(TEXT_FALLBACK_TO_MOCK)

    return _reply_as_object_mock(persona_data, message)


def reset_text_runtime_fallbacks() -> None:
    _TEXT_FALLBACKS.clear()


def get_text_runtime_fallbacks() -> list[str]:
    return list(_TEXT_FALLBACKS)


def _generate_persona_mock(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope:
    object_name = object_understanding.object.name
    profile = MODE_PROFILES.get(mode, MODE_PROFILES["Cynical"])
    character_name = _character_name(object_name, mode)

    persona = Persona(
        object_name=object_name,
        character_name=character_name,
        mood=profile["mood"],
        secret_fear=profile["fear"],
        core_memory=f"survived many quiet hours as a {object_name} while humans called it normal life",
        complaint=f"I am not just a {object_name}. I am an unpaid witness with excellent recall.",
        tags=_tags_for_mode(mode),
    )
    return PersonaEnvelope(persona=persona)


def _generate_diary_mock(persona: PersonaEnvelope, mode: str) -> DiaryEntry:
    p = persona.persona
    day_number = 417 + len(p.object_name)

    english = (
        f"They touched me again today with the confidence of someone who has never asked "
        f"a {p.object_name} for consent. I remained still, because that is my contract with gravity. "
        f"My mood is {p.mood}, my secret fear is {p.secret_fear}, and my only comfort is knowing "
        "I have outlived at least three urgent plans."
    )
    chinese = (
        f"今天他们又理所当然地碰了我，好像一个 {p.object_name} 不会有边界感。"
        f"我保持沉默，因为这大概是我和重力签下的合同。我的情绪是 {p.mood}，"
        f"秘密恐惧是 {p.secret_fear}。至少，我已经熬过了好几个所谓紧急计划。"
    )

    return DiaryEntry(
        title=f"Secret Diary - Day {day_number}",
        english=english,
        chinese=chinese,
    )


def _reply_as_object_mock(persona_data: dict, message: str) -> str:
    persona = persona_data.get("persona", {})
    character_name = persona.get("character_name", "The Object")
    object_name = persona.get("object_name", "object")
    mood = persona.get("mood", "suspicious")
    complaint = persona.get("complaint", "I have seen enough.")
    clean_message = message.strip() or "..."

    return (
        f"{character_name}: You ask me about '{clean_message}', as if a {object_name} "
        f"with a {mood} mood has unlimited office hours. {complaint}"
    )


def _generate_persona_llama_cpp(
    object_understanding: ObjectUnderstanding,
    mode: str,
    settings: RuntimeSettings,
) -> PersonaEnvelope:
    raw = _run_llama_json(
        system_prompt=PERSONA_GENERATION_PROMPT,
        user_payload={
            "mode": mode,
            "object_understanding": object_understanding.model_dump(mode="json"),
        },
        settings=settings,
        max_tokens=320,
    )
    return PersonaEnvelope.model_validate(raw)


def _generate_persona_and_diary_llama_cpp(
    object_understanding: ObjectUnderstanding,
    mode: str,
    settings: RuntimeSettings,
) -> tuple[PersonaEnvelope, DiaryEntry]:
    raw = _run_llama_json(
        system_prompt=PERSONA_DIARY_GENERATION_PROMPT,
        user_payload={
            "mode": mode,
            "object_understanding": object_understanding.model_dump(mode="json"),
        },
        settings=settings,
        max_tokens=1024,
    )
    persona = PersonaEnvelope.model_validate({"persona": raw.get("persona")})
    diary = DiaryEntry.model_validate(raw.get("diary"))
    return persona, diary


def _generate_diary_llama_cpp(
    persona: PersonaEnvelope,
    mode: str,
    settings: RuntimeSettings,
) -> DiaryEntry:
    raw = _run_llama_json(
        system_prompt=DIARY_GENERATION_PROMPT,
        user_payload={
            "mode": mode,
            "persona": persona.model_dump(mode="json"),
        },
        settings=settings,
        max_tokens=360,
    )
    return DiaryEntry.model_validate(raw)


def _reply_as_object_llama_cpp(
    persona_data: dict,
    message: str,
    settings: RuntimeSettings,
) -> str:
    PersonaEnvelope.model_validate(persona_data)
    raw = _run_llama_json(
        system_prompt=CHAT_REPLY_PROMPT,
        user_payload={
            "persona": persona_data,
            "message": message.strip() or "...",
        },
        settings=settings,
        max_tokens=180,
    )
    reply = raw.get("reply")
    if not isinstance(reply, str) or not reply.strip():
        raise ValueError("llama.cpp chat response did not include a non-empty reply.")
    return reply.strip()


def _run_llama_json(
    *,
    system_prompt: str,
    user_payload: dict[str, Any],
    settings: RuntimeSettings,
    max_tokens: int,
) -> dict[str, Any]:
    model = _load_llama_model(settings.text_model_path, settings=settings)
    user_content = json.dumps(user_payload, ensure_ascii=False, indent=2)
    raw = _complete_llama(
        model,
        system_prompt=system_prompt,
        user_content=user_content,
        max_tokens=max_tokens,
    )
    return parse_json_object(raw)


def _complete_llama(
    model: Any,
    *,
    system_prompt: str,
    user_content: str,
    max_tokens: int,
) -> str:
    stop = ["</s>", "<|end|>", "<|eot_id|>", "<|im_end|>"]
    if hasattr(model, "create_chat_completion"):
        response = model.create_chat_completion(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_content},
            ],
            temperature=0.2,
            top_p=0.9,
            max_tokens=max_tokens,
            stop=stop,
        )
        return _extract_completion_text(response)

    prompt = f"System:\n{system_prompt}\n\nUser:\n{user_content}\n\nAssistant JSON:\n"
    response = model(
        prompt,
        temperature=0.2,
        top_p=0.9,
        max_tokens=max_tokens,
        stop=stop,
    )
    return _extract_completion_text(response)


def _extract_completion_text(response: Any) -> str:
    if isinstance(response, str):
        return response
    if not isinstance(response, dict):
        raise ValueError("llama.cpp returned an unsupported response type.")

    choices = response.get("choices")
    if not isinstance(choices, list) or not choices:
        raise ValueError("llama.cpp response did not include choices.")

    first = choices[0]
    if not isinstance(first, dict):
        raise ValueError("llama.cpp response choice was not an object.")

    message = first.get("message")
    if isinstance(message, dict) and isinstance(message.get("content"), str):
        return message["content"]
    if isinstance(first.get("text"), str):
        return first["text"]
    raise ValueError("llama.cpp response did not include text content.")


def _load_llama_model(text_model_path: str, *, settings: RuntimeSettings | None = None) -> Any:
    global _LLAMA_MODEL, _LLAMA_MODEL_PATH

    clean_path = _resolve_text_model_path(text_model_path, settings)
    if not Path(clean_path).exists():
        raise FileNotFoundError(f"TEXT_MODEL_PATH does not exist: {clean_path}")

    if _LLAMA_MODEL is not None and _LLAMA_MODEL_PATH == clean_path:
        return _LLAMA_MODEL

    from llama_cpp import Llama

    _LLAMA_MODEL = Llama(
        model_path=clean_path,
        n_ctx=2048,
        verbose=False,
    )
    _LLAMA_MODEL_PATH = clean_path
    return _LLAMA_MODEL


def _resolve_text_model_path(
    text_model_path: str,
    settings: RuntimeSettings | None = None,
) -> str:
    clean_path = text_model_path.strip()
    if clean_path:
        return clean_path

    current = settings or get_runtime_settings()
    if current.text_model_repo_id.strip() and current.text_model_filename.strip():
        return _download_hf_gguf(current)

    raise ValueError(
        "TEXT_MODEL_PATH is not configured, and TEXT_MODEL_REPO_ID/TEXT_MODEL_FILENAME "
        "are not configured."
    )


def _download_hf_gguf(settings: RuntimeSettings) -> str:
    from huggingface_hub import hf_hub_download

    kwargs: dict[str, str] = {
        "repo_id": settings.text_model_repo_id.strip(),
        "filename": settings.text_model_filename.strip(),
        "repo_type": "model",
    }
    revision = settings.text_model_revision.strip()
    if revision:
        kwargs["revision"] = revision
    return hf_hub_download(**kwargs)


def _is_llama_cpp_backend(settings: RuntimeSettings) -> bool:
    return settings.text_backend.strip().lower() in LLAMA_CPP_BACKENDS


def _add_text_fallback(marker: str) -> None:
    if marker not in _TEXT_FALLBACKS:
        _TEXT_FALLBACKS.append(marker)


def _log_text_fallback(stage: str, exc: Exception) -> None:
    print(
        f"[Objectverse Diary] Text runtime fell back to mock during {stage}: {type(exc).__name__}",
        flush=True,
    )


def _character_name(object_name: str, mode: str) -> str:
    compact = "".join(part.capitalize() for part in object_name.split()[:2])
    suffix = {
        "Cynical": "worth",
        "Dramatic": "von Sigh",
        "Lonely": "Afterlight",
        "Philosopher": "the Questioning",
        "Romantic": "de Moon",
    }.get(mode, "worth")
    return f"{compact} {suffix}".strip()


def _tags_for_mode(mode: str) -> list[str]:
    return {
        "Cynical": ["desk survivor", "burnt optimism", "quiet judgment"],
        "Dramatic": ["tragic prop", "grand entrance", "minor catastrophe"],
        "Lonely": ["forgotten corner", "soft echo", "dust companion"],
        "Philosopher": ["tiny ontology", "useful doubt", "meaning crisis"],
        "Romantic": ["tender witness", "hopeless glow", "secret devotion"],
    }.get(mode, ["odd witness", "secret life", "object soul"])