"""Text generation runtime with mock and optional llama.cpp backends.""" from __future__ import annotations import json from pathlib import Path from typing import Any from src.config import RuntimeSettings, get_runtime_settings from src.models.schema import DiaryEntry, ObjectUnderstanding, Persona, PersonaEnvelope from src.prompts.diary_generation import ( CHAT_REPLY_PROMPT, DIARY_GENERATION_PROMPT, PERSONA_DIARY_GENERATION_PROMPT, ) from src.prompts.persona_generation import PERSONA_GENERATION_PROMPT from src.utils.json_repair import parse_json_object MODE_PROFILES = { "Cynical": { "mood": "tired but sarcastic", "fear": "being replaced by a newer object with worse opinions", "voice": "dry", }, "Dramatic": { "mood": "theatrical and wounded", "fear": "being forgotten before the final act", "voice": "operatic", }, "Lonely": { "mood": "softly abandoned", "fear": "becoming invisible in plain sight", "voice": "quiet", }, "Philosopher": { "mood": "curious and needlessly profound", "fear": "discovering that usefulness is not meaning", "voice": "reflective", }, "Romantic": { "mood": "hopelessly sentimental", "fear": "loving a human who only sees storage capacity", "voice": "wistful", }, } LLAMA_CPP_BACKENDS = {"llama-cpp", "llama_cpp", "llamacpp"} TEXT_FALLBACK_TO_MOCK = "text-fallback-to-mock" _LLAMA_MODEL: Any | None = None _LLAMA_MODEL_PATH: str | None = None _TEXT_FALLBACKS: list[str] = [] def generate_persona(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope: settings = get_runtime_settings() if _is_llama_cpp_backend(settings): try: return _generate_persona_llama_cpp(object_understanding, mode, settings) except Exception as exc: _log_text_fallback("persona", exc) _add_text_fallback(TEXT_FALLBACK_TO_MOCK) return _generate_persona_mock(object_understanding, mode) def generate_persona_and_diary( object_understanding: ObjectUnderstanding, mode: str, ) -> tuple[PersonaEnvelope, DiaryEntry]: settings = get_runtime_settings() if _is_llama_cpp_backend(settings): try: return _generate_persona_and_diary_llama_cpp(object_understanding, mode, settings) except Exception as exc: _log_text_fallback("persona+diary", exc) _add_text_fallback(TEXT_FALLBACK_TO_MOCK) persona = _generate_persona_mock(object_understanding, mode) return persona, _generate_diary_mock(persona, mode) def generate_diary(persona: PersonaEnvelope, mode: str) -> DiaryEntry: settings = get_runtime_settings() if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS: try: return _generate_diary_llama_cpp(persona, mode, settings) except Exception as exc: _log_text_fallback("diary", exc) _add_text_fallback(TEXT_FALLBACK_TO_MOCK) return _generate_diary_mock(persona, mode) def reply_as_object(persona_data: dict, message: str) -> str: settings = get_runtime_settings() if _is_llama_cpp_backend(settings) and TEXT_FALLBACK_TO_MOCK not in _TEXT_FALLBACKS: try: return _reply_as_object_llama_cpp(persona_data, message, settings) except Exception as exc: _log_text_fallback("chat", exc) _add_text_fallback(TEXT_FALLBACK_TO_MOCK) return _reply_as_object_mock(persona_data, message) def reset_text_runtime_fallbacks() -> None: _TEXT_FALLBACKS.clear() def get_text_runtime_fallbacks() -> list[str]: return list(_TEXT_FALLBACKS) def _generate_persona_mock(object_understanding: ObjectUnderstanding, mode: str) -> PersonaEnvelope: object_name = object_understanding.object.name profile = MODE_PROFILES.get(mode, MODE_PROFILES["Cynical"]) character_name = _character_name(object_name, mode) persona = Persona( object_name=object_name, character_name=character_name, mood=profile["mood"], secret_fear=profile["fear"], core_memory=f"survived many quiet hours as a {object_name} while humans called it normal life", complaint=f"I am not just a {object_name}. I am an unpaid witness with excellent recall.", tags=_tags_for_mode(mode), ) return PersonaEnvelope(persona=persona) def _generate_diary_mock(persona: PersonaEnvelope, mode: str) -> DiaryEntry: p = persona.persona day_number = 417 + len(p.object_name) english = ( f"They touched me again today with the confidence of someone who has never asked " f"a {p.object_name} for consent. I remained still, because that is my contract with gravity. " f"My mood is {p.mood}, my secret fear is {p.secret_fear}, and my only comfort is knowing " "I have outlived at least three urgent plans." ) chinese = ( f"今天他们又理所当然地碰了我,好像一个 {p.object_name} 不会有边界感。" f"我保持沉默,因为这大概是我和重力签下的合同。我的情绪是 {p.mood}," f"秘密恐惧是 {p.secret_fear}。至少,我已经熬过了好几个所谓紧急计划。" ) return DiaryEntry( title=f"Secret Diary - Day {day_number}", english=english, chinese=chinese, ) def _reply_as_object_mock(persona_data: dict, message: str) -> str: persona = persona_data.get("persona", {}) character_name = persona.get("character_name", "The Object") object_name = persona.get("object_name", "object") mood = persona.get("mood", "suspicious") complaint = persona.get("complaint", "I have seen enough.") clean_message = message.strip() or "..." return ( f"{character_name}: You ask me about '{clean_message}', as if a {object_name} " f"with a {mood} mood has unlimited office hours. {complaint}" ) def _generate_persona_llama_cpp( object_understanding: ObjectUnderstanding, mode: str, settings: RuntimeSettings, ) -> PersonaEnvelope: raw = _run_llama_json( system_prompt=PERSONA_GENERATION_PROMPT, user_payload={ "mode": mode, "object_understanding": object_understanding.model_dump(mode="json"), }, settings=settings, max_tokens=320, ) return PersonaEnvelope.model_validate(raw) def _generate_persona_and_diary_llama_cpp( object_understanding: ObjectUnderstanding, mode: str, settings: RuntimeSettings, ) -> tuple[PersonaEnvelope, DiaryEntry]: raw = _run_llama_json( system_prompt=PERSONA_DIARY_GENERATION_PROMPT, user_payload={ "mode": mode, "object_understanding": object_understanding.model_dump(mode="json"), }, settings=settings, max_tokens=1024, ) persona = PersonaEnvelope.model_validate({"persona": raw.get("persona")}) diary = DiaryEntry.model_validate(raw.get("diary")) return persona, diary def _generate_diary_llama_cpp( persona: PersonaEnvelope, mode: str, settings: RuntimeSettings, ) -> DiaryEntry: raw = _run_llama_json( system_prompt=DIARY_GENERATION_PROMPT, user_payload={ "mode": mode, "persona": persona.model_dump(mode="json"), }, settings=settings, max_tokens=360, ) return DiaryEntry.model_validate(raw) def _reply_as_object_llama_cpp( persona_data: dict, message: str, settings: RuntimeSettings, ) -> str: PersonaEnvelope.model_validate(persona_data) raw = _run_llama_json( system_prompt=CHAT_REPLY_PROMPT, user_payload={ "persona": persona_data, "message": message.strip() or "...", }, settings=settings, max_tokens=180, ) reply = raw.get("reply") if not isinstance(reply, str) or not reply.strip(): raise ValueError("llama.cpp chat response did not include a non-empty reply.") return reply.strip() def _run_llama_json( *, system_prompt: str, user_payload: dict[str, Any], settings: RuntimeSettings, max_tokens: int, ) -> dict[str, Any]: model = _load_llama_model(settings.text_model_path, settings=settings) user_content = json.dumps(user_payload, ensure_ascii=False, indent=2) raw = _complete_llama( model, system_prompt=system_prompt, user_content=user_content, max_tokens=max_tokens, ) return parse_json_object(raw) def _complete_llama( model: Any, *, system_prompt: str, user_content: str, max_tokens: int, ) -> str: stop = ["", "<|end|>", "<|eot_id|>", "<|im_end|>"] if hasattr(model, "create_chat_completion"): response = model.create_chat_completion( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}, ], temperature=0.2, top_p=0.9, max_tokens=max_tokens, stop=stop, ) return _extract_completion_text(response) prompt = f"System:\n{system_prompt}\n\nUser:\n{user_content}\n\nAssistant JSON:\n" response = model( prompt, temperature=0.2, top_p=0.9, max_tokens=max_tokens, stop=stop, ) return _extract_completion_text(response) def _extract_completion_text(response: Any) -> str: if isinstance(response, str): return response if not isinstance(response, dict): raise ValueError("llama.cpp returned an unsupported response type.") choices = response.get("choices") if not isinstance(choices, list) or not choices: raise ValueError("llama.cpp response did not include choices.") first = choices[0] if not isinstance(first, dict): raise ValueError("llama.cpp response choice was not an object.") message = first.get("message") if isinstance(message, dict) and isinstance(message.get("content"), str): return message["content"] if isinstance(first.get("text"), str): return first["text"] raise ValueError("llama.cpp response did not include text content.") def _load_llama_model(text_model_path: str, *, settings: RuntimeSettings | None = None) -> Any: global _LLAMA_MODEL, _LLAMA_MODEL_PATH clean_path = _resolve_text_model_path(text_model_path, settings) if not Path(clean_path).exists(): raise FileNotFoundError(f"TEXT_MODEL_PATH does not exist: {clean_path}") if _LLAMA_MODEL is not None and _LLAMA_MODEL_PATH == clean_path: return _LLAMA_MODEL from llama_cpp import Llama _LLAMA_MODEL = Llama( model_path=clean_path, n_ctx=2048, verbose=False, ) _LLAMA_MODEL_PATH = clean_path return _LLAMA_MODEL def _resolve_text_model_path( text_model_path: str, settings: RuntimeSettings | None = None, ) -> str: clean_path = text_model_path.strip() if clean_path: return clean_path current = settings or get_runtime_settings() if current.text_model_repo_id.strip() and current.text_model_filename.strip(): return _download_hf_gguf(current) raise ValueError( "TEXT_MODEL_PATH is not configured, and TEXT_MODEL_REPO_ID/TEXT_MODEL_FILENAME " "are not configured." ) def _download_hf_gguf(settings: RuntimeSettings) -> str: from huggingface_hub import hf_hub_download kwargs: dict[str, str] = { "repo_id": settings.text_model_repo_id.strip(), "filename": settings.text_model_filename.strip(), "repo_type": "model", } revision = settings.text_model_revision.strip() if revision: kwargs["revision"] = revision return hf_hub_download(**kwargs) def _is_llama_cpp_backend(settings: RuntimeSettings) -> bool: return settings.text_backend.strip().lower() in LLAMA_CPP_BACKENDS def _add_text_fallback(marker: str) -> None: if marker not in _TEXT_FALLBACKS: _TEXT_FALLBACKS.append(marker) def _log_text_fallback(stage: str, exc: Exception) -> None: print( f"[Objectverse Diary] Text runtime fell back to mock during {stage}: {type(exc).__name__}", flush=True, ) def _character_name(object_name: str, mode: str) -> str: compact = "".join(part.capitalize() for part in object_name.split()[:2]) suffix = { "Cynical": "worth", "Dramatic": "von Sigh", "Lonely": "Afterlight", "Philosopher": "the Questioning", "Romantic": "de Moon", }.get(mode, "worth") return f"{compact} {suffix}".strip() def _tags_for_mode(mode: str) -> list[str]: return { "Cynical": ["desk survivor", "burnt optimism", "quiet judgment"], "Dramatic": ["tragic prop", "grand entrance", "minor catastrophe"], "Lonely": ["forgotten corner", "soft echo", "dust companion"], "Philosopher": ["tiny ontology", "useful doubt", "meaning crisis"], "Romantic": ["tender witness", "hopeless glow", "secret devotion"], }.get(mode, ["odd witness", "secret life", "object soul"])